Movatterモバイル変換

Go to the documentation of this file.

1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This implements the TargetLowering class.

10//

11//===----------------------------------------------------------------------===//

13#include "llvm/CodeGen/TargetLowering.h"

14#include "llvm/ADT/STLExtras.h"

15#include "llvm/Analysis/ValueTracking.h"

16#include "llvm/Analysis/VectorUtils.h"

17#include "llvm/CodeGen/CallingConvLower.h"

18#include "llvm/CodeGen/CodeGenCommonISel.h"

19#include "llvm/CodeGen/MachineFrameInfo.h"

20#include "llvm/CodeGen/MachineFunction.h"

21#include "llvm/CodeGen/MachineJumpTableInfo.h"

22#include "llvm/CodeGen/MachineRegisterInfo.h"

23#include "llvm/CodeGen/SelectionDAG.h"

24#include "llvm/CodeGen/TargetRegisterInfo.h"

25#include "llvm/IR/DataLayout.h"

26#include "llvm/IR/DerivedTypes.h"

27#include "llvm/IR/GlobalVariable.h"

28#include "llvm/IR/LLVMContext.h"

29#include "llvm/MC/MCAsmInfo.h"

30#include "llvm/MC/MCExpr.h"

31#include "llvm/Support/DivisionByConstantInfo.h"

32#include "llvm/Support/ErrorHandling.h"

33#include "llvm/Support/KnownBits.h"

34#include "llvm/Support/MathExtras.h"

35#include "llvm/Target/TargetMachine.h"

36#include <cctype>

37using namespacellvm;

39/// NOTE: The TargetMachine owns TLOF.

40TargetLowering::TargetLowering(constTargetMachine &tm)

41 :TargetLoweringBase(tm) {}

43constchar *TargetLowering::getTargetNodeName(unsigned Opcode) const{

44returnnullptr;

45}

47boolTargetLowering::isPositionIndependent() const{

48returngetTargetMachine().isPositionIndependent();

49}

51/// Check whether a given call node is in tail position within its function. If

52/// so, it sets Chain to the input chain of the tail call.

53boolTargetLowering::isInTailCallPosition(SelectionDAG &DAG,SDNode *Node,

54SDValue &Chain) const{

55constFunction &F = DAG.getMachineFunction().getFunction();

57// First, check if tail calls have been disabled in this function.

58if (F.getFnAttribute("disable-tail-calls").getValueAsBool())

59returnfalse;

61// Conservatively require the attributes of the call to match those of

62// the return. Ignore following attributes because they don't affect the

63// call sequence.

64AttrBuilder CallerAttrs(F.getContext(),F.getAttributes().getRetAttrs());

65for (constauto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,

66 Attribute::DereferenceableOrNull, Attribute::NoAlias,

67 Attribute::NonNull, Attribute::NoUndef,

68 Attribute::Range, Attribute::NoFPClass})

69 CallerAttrs.removeAttribute(Attr);

71if (CallerAttrs.hasAttributes())

72returnfalse;

74// It's not safe to eliminate the sign / zero extension of the return value.

75if (CallerAttrs.contains(Attribute::ZExt) ||

76 CallerAttrs.contains(Attribute::SExt))

77returnfalse;

79// Check if the only use is a function return node.

80returnisUsedByReturnOnly(Node, Chain);

81}

83boolTargetLowering::parametersInCSRMatch(constMachineRegisterInfo &MRI,

84constuint32_t *CallerPreservedMask,

85constSmallVectorImpl<CCValAssign> &ArgLocs,

86constSmallVectorImpl<SDValue> &OutVals) const{

87for (unsignedI = 0, E = ArgLocs.size();I != E; ++I) {

88constCCValAssign &ArgLoc = ArgLocs[I];

89if (!ArgLoc.isRegLoc())

90continue;

91MCRegister Reg = ArgLoc.getLocReg();

92// Only look at callee saved registers.

93if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))

94continue;

95// Check that we pass the value used for the caller.

96// (We look for a CopyFromReg reading a virtual register that is used

97// for the function live-in value of register Reg)

98SDValue Value = OutVals[I];

99if (Value->getOpcode() ==ISD::AssertZext)

100Value =Value.getOperand(0);

101if (Value->getOpcode() !=ISD::CopyFromReg)

102returnfalse;

103Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();

104if (MRI.getLiveInPhysReg(ArgReg) != Reg)

105returnfalse;

106 }

107returntrue;

108}

109

110/// Set CallLoweringInfo attribute flags based on a call instruction

111/// and called function attributes.

112voidTargetLoweringBase::ArgListEntry::setAttributes(constCallBase *Call,

113unsigned ArgIdx) {

114IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);

115IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);

116IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);

117IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);

118IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);

119IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);

120IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);

121IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);

122IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);

123IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);

124IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);

125IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);

126IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);

127Alignment = Call->getParamStackAlign(ArgIdx);

128IndirectType =nullptr;

129assert(IsByVal +IsPreallocated +IsInAlloca +IsSRet <= 1 &&

130"multiple ABI attributes?");

131if (IsByVal) {

132IndirectType = Call->getParamByValType(ArgIdx);

133if (!Alignment)

134Alignment = Call->getParamAlign(ArgIdx);

135 }

136if (IsPreallocated)

137IndirectType = Call->getParamPreallocatedType(ArgIdx);

138if (IsInAlloca)

139IndirectType = Call->getParamInAllocaType(ArgIdx);

140if (IsSRet)

141IndirectType = Call->getParamStructRetType(ArgIdx);

142}

143

144/// Generate a libcall taking the given operands as arguments and returning a

145/// result of type RetVT.

146std::pair<SDValue, SDValue>

147TargetLowering::makeLibCall(SelectionDAG &DAG,RTLIB::Libcall LC,EVT RetVT,

148ArrayRef<SDValue> Ops,

149MakeLibCallOptions CallOptions,

150constSDLoc &dl,

151SDValue InChain) const{

152if (!InChain)

153 InChain = DAG.getEntryNode();

154

155TargetLowering::ArgListTy Args;

156 Args.reserve(Ops.size());

157

158TargetLowering::ArgListEntry Entry;

159for (unsigned i = 0; i < Ops.size(); ++i) {

160SDValue NewOp = Ops[i];

161 Entry.Node = NewOp;

162 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());

163 Entry.IsSExt =

164shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);

165 Entry.IsZExt = !Entry.IsSExt;

166

167if (CallOptions.IsSoften &&

168 !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {

169 Entry.IsSExt = Entry.IsZExt =false;

170 }

171 Args.push_back(Entry);

172 }

173

174if (LC == RTLIB::UNKNOWN_LIBCALL)

175report_fatal_error("Unsupported library call operation!");

176SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),

177getPointerTy(DAG.getDataLayout()));

178

179Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());

180TargetLowering::CallLoweringInfo CLI(DAG);

181bool signExtend =shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);

182bool zeroExtend = !signExtend;

183

184if (CallOptions.IsSoften &&

185 !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {

186 signExtend = zeroExtend =false;

187 }

188

189 CLI.setDebugLoc(dl)

190 .setChain(InChain)

191 .setLibCallee(getLibcallCallingConv(LC),RetTy, Callee, std::move(Args))

192 .setNoReturn(CallOptions.DoesNotReturn)

193 .setDiscardResult(!CallOptions.IsReturnValueUsed)

194 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)

195 .setSExtResult(signExtend)

196 .setZExtResult(zeroExtend);

197return LowerCallTo(CLI);

198}

199

200boolTargetLowering::findOptimalMemOpLowering(

201 std::vector<EVT> &MemOps,unsigned Limit,constMemOp &Op,unsigned DstAS,

202unsigned SrcAS,constAttributeList &FuncAttributes) const{

203if (Limit != ~unsigned(0) &&Op.isMemcpyWithFixedDstAlign() &&

204Op.getSrcAlign() <Op.getDstAlign())

205returnfalse;

206

207EVT VT =getOptimalMemOpType(Op, FuncAttributes);

208

209if (VT == MVT::Other) {

210// Use the largest integer type whose alignment constraints are satisfied.

211// We only need to check DstAlign here as SrcAlign is always greater or

212// equal to DstAlign (or zero).

213 VT = MVT::LAST_INTEGER_VALUETYPE;

214if (Op.isFixedDstAlign())

215while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&

216 !allowsMisalignedMemoryAccesses(VT, DstAS,Op.getDstAlign()))

217 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);

218assert(VT.isInteger());

219

220// Find the largest legal integer type.

221MVT LVT = MVT::LAST_INTEGER_VALUETYPE;

222while (!isTypeLegal(LVT))

223 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);

224assert(LVT.isInteger());

225

226// If the type we've chosen is larger than the largest legal integer type

227// then use that instead.

228if (VT.bitsGT(LVT))

229 VT = LVT;

230 }

231

232unsigned NumMemOps = 0;

233uint64_t Size =Op.size();

234while (Size) {

235unsigned VTSize = VT.getSizeInBits() / 8;

236while (VTSize >Size) {

237// For now, only use non-vector load / store's for the left-over pieces.

238EVT NewVT = VT;

239unsigned NewVTSize;

240

241bool Found =false;

242if (VT.isVector() || VT.isFloatingPoint()) {

243 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;

244if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&

245isSafeMemOpType(NewVT.getSimpleVT()))

246 Found =true;

247elseif (NewVT == MVT::i64 &&

248isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&

249isSafeMemOpType(MVT::f64)) {

250// i64 is usually not legal on 32-bit targets, but f64 may be.

251 NewVT = MVT::f64;

252 Found =true;

253 }

254 }

255

256if (!Found) {

257do {

258 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);

259if (NewVT == MVT::i8)

260break;

261 }while (!isSafeMemOpType(NewVT.getSimpleVT()));

262 }

263 NewVTSize = NewVT.getSizeInBits() / 8;

264

265// If the new VT cannot cover all of the remaining bits, then consider

266// issuing a (or a pair of) unaligned and overlapping load / store.

267unsignedFast;

268if (NumMemOps &&Op.allowOverlap() && NewVTSize <Size &&

269allowsMisalignedMemoryAccesses(

270 VT, DstAS,Op.isFixedDstAlign() ?Op.getDstAlign() :Align(1),

271MachineMemOperand::MONone, &Fast) &&

272Fast)

273 VTSize =Size;

274else {

275 VT = NewVT;

276 VTSize = NewVTSize;

277 }

278 }

279

280if (++NumMemOps > Limit)

281returnfalse;

282

283 MemOps.push_back(VT);

284Size -= VTSize;

285 }

286

287returntrue;

288}

289

290/// Soften the operands of a comparison. This code is shared among BR_CC,

291/// SELECT_CC, and SETCC handlers.

292voidTargetLowering::softenSetCCOperands(SelectionDAG &DAG,EVT VT,

293SDValue &NewLHS,SDValue &NewRHS,

294ISD::CondCode &CCCode,

295constSDLoc &dl,constSDValue OldLHS,

296constSDValue OldRHS) const{

297SDValue Chain;

298return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,

299 OldRHS, Chain);

300}

301

302voidTargetLowering::softenSetCCOperands(SelectionDAG &DAG,EVT VT,

303SDValue &NewLHS,SDValue &NewRHS,

304ISD::CondCode &CCCode,

305constSDLoc &dl,constSDValue OldLHS,

306constSDValue OldRHS,

307SDValue &Chain,

308bool IsSignaling) const{

309// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc

310// not supporting it. We can update this code when libgcc provides such

311// functions.

312

313assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)

314 &&"Unsupported setcc type!");

315

316// Expand into one or more soft-fp libcall(s).

317RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;

318bool ShouldInvertCC =false;

319switch (CCCode) {

320caseISD::SETEQ:

321caseISD::SETOEQ:

322 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :

323 (VT == MVT::f64) ? RTLIB::OEQ_F64 :

324 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;

325break;

326caseISD::SETNE:

327caseISD::SETUNE:

328 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :

329 (VT == MVT::f64) ? RTLIB::UNE_F64 :

330 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;

331break;

332caseISD::SETGE:

333caseISD::SETOGE:

334 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :

335 (VT == MVT::f64) ? RTLIB::OGE_F64 :

336 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;

337break;

338caseISD::SETLT:

339caseISD::SETOLT:

340 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :

341 (VT == MVT::f64) ? RTLIB::OLT_F64 :

342 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;

343break;

344caseISD::SETLE:

345caseISD::SETOLE:

346 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :

347 (VT == MVT::f64) ? RTLIB::OLE_F64 :

348 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;

349break;

350caseISD::SETGT:

351caseISD::SETOGT:

352 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :

353 (VT == MVT::f64) ? RTLIB::OGT_F64 :

354 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;

355break;

356caseISD::SETO:

357 ShouldInvertCC =true;

358 [[fallthrough]];

359caseISD::SETUO:

360 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :

361 (VT == MVT::f64) ? RTLIB::UO_F64 :

362 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;

363break;

364caseISD::SETONE:

365// SETONE = O && UNE

366 ShouldInvertCC =true;

367 [[fallthrough]];

368caseISD::SETUEQ:

369 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :

370 (VT == MVT::f64) ? RTLIB::UO_F64 :

371 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;

372 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :

373 (VT == MVT::f64) ? RTLIB::OEQ_F64 :

374 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;

375break;

376default:

377// Invert CC for unordered comparisons

378 ShouldInvertCC =true;

379switch (CCCode) {

380caseISD::SETULT:

381 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :

382 (VT == MVT::f64) ? RTLIB::OGE_F64 :

383 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;

384break;

385caseISD::SETULE:

386 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :

387 (VT == MVT::f64) ? RTLIB::OGT_F64 :

388 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;

389break;

390caseISD::SETUGT:

391 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :

392 (VT == MVT::f64) ? RTLIB::OLE_F64 :

393 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;

394break;

395caseISD::SETUGE:

396 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :

397 (VT == MVT::f64) ? RTLIB::OLT_F64 :

398 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;

399break;

400default:llvm_unreachable("Do not know how to soften this setcc!");

401 }

402 }

403

404// Use the target specific return value for comparison lib calls.

405EVT RetVT =getCmpLibcallReturnType();

406SDValue Ops[2] = {NewLHS, NewRHS};

407TargetLowering::MakeLibCallOptions CallOptions;

408EVT OpsVT[2] = { OldLHS.getValueType(),

409 OldRHS.getValueType() };

410 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT,true);

411auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);

412 NewLHS = Call.first;

413 NewRHS = DAG.getConstant(0, dl, RetVT);

414

415 CCCode =getCmpLibcallCC(LC1);

416if (ShouldInvertCC) {

417assert(RetVT.isInteger());

418 CCCode = getSetCCInverse(CCCode, RetVT);

419 }

420

421if (LC2 == RTLIB::UNKNOWN_LIBCALL) {

422// Update Chain.

423 Chain = Call.second;

424 }else {

425EVT SetCCVT =

426getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);

427SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);

428auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);

429 CCCode =getCmpLibcallCC(LC2);

430if (ShouldInvertCC)

431 CCCode = getSetCCInverse(CCCode, RetVT);

432 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);

433if (Chain)

434 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,

435 Call2.second);

436 NewLHS = DAG.getNode(ShouldInvertCC ?ISD::AND :ISD::OR, dl,

437 Tmp.getValueType(), Tmp, NewLHS);

438 NewRHS =SDValue();

439 }

440}

441

442/// Return the entry encoding for a jump table in the current function. The

443/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.

444unsignedTargetLowering::getJumpTableEncoding() const{

445// In non-pic modes, just use the address of a block.

446if (!isPositionIndependent())

447returnMachineJumpTableInfo::EK_BlockAddress;

448

449// In PIC mode, if the target supports a GPRel32 directive, use it.

450if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() !=nullptr)

451returnMachineJumpTableInfo::EK_GPRel32BlockAddress;

452

453// Otherwise, use a label difference.

454returnMachineJumpTableInfo::EK_LabelDifference32;

455}

456

457SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,

458SelectionDAG &DAG) const{

459// If our PIC model is GP relative, use the global offset table as the base.

460unsigned JTEncoding = getJumpTableEncoding();

461

462if ((JTEncoding ==MachineJumpTableInfo::EK_GPRel64BlockAddress) ||

463 (JTEncoding ==MachineJumpTableInfo::EK_GPRel32BlockAddress))

464return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));

465

466return Table;

467}

468

469/// This returns the relocation base for the given PIC jumptable, the same as

470/// getPICJumpTableRelocBase, but as an MCExpr.

471constMCExpr *

472TargetLowering::getPICJumpTableRelocBaseExpr(constMachineFunction *MF,

473unsigned JTI,MCContext &Ctx) const{

474// The normal PIC reloc base is the label at the start of the jump table.

475returnMCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);

476}

477

478SDValue TargetLowering::expandIndirectJTBranch(constSDLoc &dl,SDValue Value,

479SDValue Addr,int JTI,

480SelectionDAG &DAG) const{

481SDValue Chain =Value;

482// Jump table debug info is only needed if CodeView is enabled.

483if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {

484 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);

485 }

486return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain,Addr);

487}

488

489bool

490TargetLowering::isOffsetFoldingLegal(constGlobalAddressSDNode *GA) const{

491constTargetMachine &TM =getTargetMachine();

492constGlobalValue *GV = GA->getGlobal();

493

494// If the address is not even local to this DSO we will have to load it from

495// a got and then add the offset.

496if (!TM.shouldAssumeDSOLocal(GV))

497returnfalse;

498

499// If the code is position independent we will have to add a base register.

500if (isPositionIndependent())

501returnfalse;

502

503// Otherwise we can do it.

504returntrue;

505}

506

507//===----------------------------------------------------------------------===//

508// Optimization Methods

509//===----------------------------------------------------------------------===//

510

511/// If the specified instruction has a constant integer operand and there are

512/// bits set in that constant that are not demanded, then clear those bits and

513/// return true.

514boolTargetLowering::ShrinkDemandedConstant(SDValue Op,

515constAPInt &DemandedBits,

516constAPInt &DemandedElts,

517TargetLoweringOpt &TLO) const{

518SDLoc DL(Op);

519unsigned Opcode =Op.getOpcode();

520

521// Early-out if we've ended up calling an undemanded node, leave this to

522// constant folding.

523if (DemandedBits.isZero() || DemandedElts.isZero())

524returnfalse;

525

526// Do target-specific constant optimization.

527if (targetShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))

528return TLO.New.getNode();

529

530// FIXME: ISD::SELECT, ISD::SELECT_CC

531switch (Opcode) {

532default:

533break;

534caseISD::XOR:

535caseISD::AND:

536caseISD::OR: {

537auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));

538if (!Op1C || Op1C->isOpaque())

539returnfalse;

540

541// If this is a 'not' op, don't touch it because that's a canonical form.

542constAPInt &C = Op1C->getAPIntValue();

543if (Opcode ==ISD::XOR &&DemandedBits.isSubsetOf(C))

544returnfalse;

545

546if (!C.isSubsetOf(DemandedBits)) {

547EVT VT =Op.getValueType();

548SDValue NewC = TLO.DAG.getConstant(DemandedBits &C,DL, VT);

549SDValue NewOp = TLO.DAG.getNode(Opcode,DL, VT,Op.getOperand(0), NewC,

550Op->getFlags());

551return TLO.CombineTo(Op, NewOp);

552 }

553

554break;

555 }

556 }

557

558returnfalse;

559}

560

561boolTargetLowering::ShrinkDemandedConstant(SDValue Op,

562constAPInt &DemandedBits,

563TargetLoweringOpt &TLO) const{

564EVT VT =Op.getValueType();

565APInt DemandedElts = VT.isVector()

566 ?APInt::getAllOnes(VT.getVectorNumElements())

567 :APInt(1, 1);

568returnShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO);

569}

570

571/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.

572/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,

573/// but it could be generalized for targets with other types of implicit

574/// widening casts.

575boolTargetLowering::ShrinkDemandedOp(SDValue Op,unsignedBitWidth,

576constAPInt &DemandedBits,

577TargetLoweringOpt &TLO) const{

578assert(Op.getNumOperands() == 2 &&

579"ShrinkDemandedOp only supports binary operators!");

580assert(Op.getNode()->getNumValues() == 1 &&

581"ShrinkDemandedOp only supports nodes with one result!");

582

583EVT VT =Op.getValueType();

584SelectionDAG &DAG = TLO.DAG;

585SDLoc dl(Op);

586

587// Early return, as this function cannot handle vector types.

588if (VT.isVector())

589returnfalse;

590

591assert(Op.getOperand(0).getValueType().getScalarSizeInBits() ==BitWidth &&

592Op.getOperand(1).getValueType().getScalarSizeInBits() ==BitWidth &&

593"ShrinkDemandedOp only supports operands that have the same size!");

594

595// Don't do this if the node has another user, which may require the

596// full value.

597if (!Op.getNode()->hasOneUse())

598returnfalse;

599

600// Search for the smallest integer type with free casts to and from

601// Op's type. For expedience, just check power-of-2 integer types.

602unsigned DemandedSize =DemandedBits.getActiveBits();

603for (unsigned SmallVTBits =llvm::bit_ceil(DemandedSize);

604 SmallVTBits <BitWidth; SmallVTBits =NextPowerOf2(SmallVTBits)) {

605EVT SmallVT =EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);

606if (isTruncateFree(VT, SmallVT) &&isZExtFree(SmallVT, VT)) {

607// We found a type with free casts.

608

609// If the operation has the 'disjoint' flag, then the

610// operands on the new node are also disjoint.

611SDNodeFlags Flags(Op->getFlags().hasDisjoint() ?SDNodeFlags::Disjoint

612 :SDNodeFlags::None);

613SDValue X = DAG.getNode(

614Op.getOpcode(), dl, SmallVT,

615 DAG.getNode(ISD::TRUNCATE, dl, SmallVT,Op.getOperand(0)),

616 DAG.getNode(ISD::TRUNCATE, dl, SmallVT,Op.getOperand(1)), Flags);

617assert(DemandedSize <= SmallVTBits &&"Narrowed below demanded bits?");

618SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT,X);

619return TLO.CombineTo(Op, Z);

620 }

621 }

622returnfalse;

623}

624

625boolTargetLowering::SimplifyDemandedBits(SDValue Op,constAPInt &DemandedBits,

626DAGCombinerInfo &DCI) const{

627SelectionDAG &DAG = DCI.DAG;

628TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),

629 !DCI.isBeforeLegalizeOps());

630KnownBits Known;

631

632bool Simplified = SimplifyDemandedBits(Op,DemandedBits, Known, TLO);

633if (Simplified) {

634 DCI.AddToWorklist(Op.getNode());

635 DCI.CommitTargetLoweringOpt(TLO);

636 }

637return Simplified;

638}

639

640boolTargetLowering::SimplifyDemandedBits(SDValue Op,constAPInt &DemandedBits,

641constAPInt &DemandedElts,

642DAGCombinerInfo &DCI) const{

643SelectionDAG &DAG = DCI.DAG;

644TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),

645 !DCI.isBeforeLegalizeOps());

646KnownBits Known;

647

648bool Simplified =

649 SimplifyDemandedBits(Op,DemandedBits, DemandedElts, Known, TLO);

650if (Simplified) {

651 DCI.AddToWorklist(Op.getNode());

652 DCI.CommitTargetLoweringOpt(TLO);

653 }

654return Simplified;

655}

656

657boolTargetLowering::SimplifyDemandedBits(SDValue Op,constAPInt &DemandedBits,

658KnownBits &Known,

659TargetLoweringOpt &TLO,

660unsignedDepth,

661bool AssumeSingleUse) const{

662EVT VT =Op.getValueType();

663

664// Since the number of lanes in a scalable vector is unknown at compile time,

665// we track one bit which is implicitly broadcast to all lanes. This means

666// that all lanes in a scalable vector are considered demanded.

667APInt DemandedElts = VT.isFixedLengthVector()

668 ?APInt::getAllOnes(VT.getVectorNumElements())

669 :APInt(1, 1);

670return SimplifyDemandedBits(Op,DemandedBits, DemandedElts, Known, TLO,Depth,

671 AssumeSingleUse);

672}

673

674// TODO: Under what circumstances can we create nodes? Constant folding?

675SDValue TargetLowering::SimplifyMultipleUseDemandedBits(

676SDValue Op,constAPInt &DemandedBits,constAPInt &DemandedElts,

677SelectionDAG &DAG,unsignedDepth) const{

678EVT VT =Op.getValueType();

679

680// Limit search depth.

681if (Depth >=SelectionDAG::MaxRecursionDepth)

682returnSDValue();

683

684// Ignore UNDEFs.

685if (Op.isUndef())

686returnSDValue();

687

688// Not demanding any bits/elts from Op.

689if (DemandedBits == 0 || DemandedElts == 0)

690return DAG.getUNDEF(VT);

691

692bool IsLE = DAG.getDataLayout().isLittleEndian();

693unsigned NumElts = DemandedElts.getBitWidth();

694unsignedBitWidth =DemandedBits.getBitWidth();

695KnownBits LHSKnown, RHSKnown;

696switch (Op.getOpcode()) {

697caseISD::BITCAST: {

698if (VT.isScalableVector())

699returnSDValue();

700

701SDValue Src =peekThroughBitcasts(Op.getOperand(0));

702EVT SrcVT = Src.getValueType();

703EVT DstVT =Op.getValueType();

704if (SrcVT == DstVT)

705return Src;

706

707unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();

708unsigned NumDstEltBits = DstVT.getScalarSizeInBits();

709if (NumSrcEltBits == NumDstEltBits)

710if (SDValue V = SimplifyMultipleUseDemandedBits(

711 Src,DemandedBits, DemandedElts, DAG,Depth + 1))

712return DAG.getBitcast(DstVT, V);

713

714if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {

715unsigned Scale = NumDstEltBits / NumSrcEltBits;

716unsigned NumSrcElts = SrcVT.getVectorNumElements();

717APInt DemandedSrcBits =APInt::getZero(NumSrcEltBits);

718APInt DemandedSrcElts =APInt::getZero(NumSrcElts);

719for (unsigned i = 0; i != Scale; ++i) {

720unsigned EltOffset = IsLE ? i : (Scale - 1 - i);

721unsigned BitOffset = EltOffset * NumSrcEltBits;

722APInt Sub =DemandedBits.extractBits(NumSrcEltBits, BitOffset);

723if (!Sub.isZero()) {

724 DemandedSrcBits |= Sub;

725for (unsigned j = 0; j != NumElts; ++j)

726if (DemandedElts[j])

727 DemandedSrcElts.setBit((j * Scale) + i);

728 }

729 }

730

731if (SDValue V = SimplifyMultipleUseDemandedBits(

732 Src, DemandedSrcBits, DemandedSrcElts, DAG,Depth + 1))

733return DAG.getBitcast(DstVT, V);

734 }

735

736// TODO - bigendian once we have test coverage.

737if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {

738unsigned Scale = NumSrcEltBits / NumDstEltBits;

739unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;

740APInt DemandedSrcBits =APInt::getZero(NumSrcEltBits);

741APInt DemandedSrcElts =APInt::getZero(NumSrcElts);

742for (unsigned i = 0; i != NumElts; ++i)

743if (DemandedElts[i]) {

744unsignedOffset = (i % Scale) * NumDstEltBits;

745 DemandedSrcBits.insertBits(DemandedBits,Offset);

746 DemandedSrcElts.setBit(i / Scale);

747 }

748

749if (SDValue V = SimplifyMultipleUseDemandedBits(

750 Src, DemandedSrcBits, DemandedSrcElts, DAG,Depth + 1))

751return DAG.getBitcast(DstVT, V);

752 }

753

754break;

755 }

756caseISD::FREEZE: {

757SDValue N0 =Op.getOperand(0);

758if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,

759/*PoisonOnly=*/false))

760return N0;

761break;

762 }

763caseISD::AND: {

764 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);

765 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);

766

767// If all of the demanded bits are known 1 on one side, return the other.

768// These bits cannot contribute to the result of the 'and' in this

769// context.

770if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))

771returnOp.getOperand(0);

772if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))

773returnOp.getOperand(1);

774break;

775 }

776caseISD::OR: {

777 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);

778 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);

779

780// If all of the demanded bits are known zero on one side, return the

781// other. These bits cannot contribute to the result of the 'or' in this

782// context.

783if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))

784returnOp.getOperand(0);

785if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))

786returnOp.getOperand(1);

787break;

788 }

789caseISD::XOR: {

790 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);

791 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);

792

793// If all of the demanded bits are known zero on one side, return the

794// other.

795if (DemandedBits.isSubsetOf(RHSKnown.Zero))

796returnOp.getOperand(0);

797if (DemandedBits.isSubsetOf(LHSKnown.Zero))

798returnOp.getOperand(1);

799break;

800 }

801caseISD::ADD: {

802 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);

803if (RHSKnown.isZero())

804returnOp.getOperand(0);

805

806 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);

807if (LHSKnown.isZero())

808returnOp.getOperand(1);

809break;

810 }

811caseISD::SHL: {

812// If we are only demanding sign bits then we can use the shift source

813// directly.

814if (std::optional<uint64_t> MaxSA =

815 DAG.getValidMaximumShiftAmount(Op, DemandedElts,Depth + 1)) {

816SDValue Op0 =Op.getOperand(0);

817unsigned ShAmt = *MaxSA;

818unsigned NumSignBits =

819 DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);

820unsigned UpperDemandedBits =BitWidth -DemandedBits.countr_zero();

821if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))

822return Op0;

823 }

824break;

825 }

826caseISD::SRL: {

827// If we are only demanding sign bits then we can use the shift source

828// directly.

829if (std::optional<uint64_t> MaxSA =

830 DAG.getValidMaximumShiftAmount(Op, DemandedElts,Depth + 1)) {

831SDValue Op0 =Op.getOperand(0);

832unsigned ShAmt = *MaxSA;

833// Must already be signbits in DemandedBits bounds, and can't demand any

834// shifted in zeroes.

835if (DemandedBits.countl_zero() >= ShAmt) {

836unsigned NumSignBits =

837 DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);

838if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))

839return Op0;

840 }

841 }

842break;

843 }

844caseISD::SETCC: {

845SDValue Op0 =Op.getOperand(0);

846SDValue Op1 =Op.getOperand(1);

847ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

848// If (1) we only need the sign-bit, (2) the setcc operands are the same

849// width as the setcc result, and (3) the result of a setcc conforms to 0 or

850// -1, we may be able to bypass the setcc.

851if (DemandedBits.isSignMask() &&

852 Op0.getScalarValueSizeInBits() ==BitWidth &&

853getBooleanContents(Op0.getValueType()) ==

854BooleanContent::ZeroOrNegativeOneBooleanContent) {

855// If we're testing X < 0, then this compare isn't needed - just use X!

856// FIXME: We're limiting to integer types here, but this should also work

857// if we don't care about FP signed-zero. The use of SETLT with FP means

858// that we don't care about NaNs.

859if (CC ==ISD::SETLT && Op1.getValueType().isInteger() &&

860 (isNullConstant(Op1) ||ISD::isBuildVectorAllZeros(Op1.getNode())))

861return Op0;

862 }

863break;

864 }

865caseISD::SIGN_EXTEND_INREG: {

866// If none of the extended bits are demanded, eliminate the sextinreg.

867SDValue Op0 =Op.getOperand(0);

868EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

869unsigned ExBits = ExVT.getScalarSizeInBits();

870if (DemandedBits.getActiveBits() <= ExBits &&

871shouldRemoveRedundantExtend(Op))

872return Op0;

873// If the input is already sign extended, just drop the extension.

874unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);

875if (NumSignBits >= (BitWidth - ExBits + 1))

876return Op0;

877break;

878 }

879caseISD::ANY_EXTEND_VECTOR_INREG:

880caseISD::SIGN_EXTEND_VECTOR_INREG:

881caseISD::ZERO_EXTEND_VECTOR_INREG: {

882if (VT.isScalableVector())

883returnSDValue();

884

885// If we only want the lowest element and none of extended bits, then we can

886// return the bitcasted source vector.

887SDValue Src =Op.getOperand(0);

888EVT SrcVT = Src.getValueType();

889EVT DstVT =Op.getValueType();

890if (IsLE && DemandedElts == 1 &&

891 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&

892DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {

893return DAG.getBitcast(DstVT, Src);

894 }

895break;

896 }

897caseISD::INSERT_VECTOR_ELT: {

898if (VT.isScalableVector())

899returnSDValue();

900

901// If we don't demand the inserted element, return the base vector.

902SDValue Vec =Op.getOperand(0);

903auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));

904EVT VecVT = Vec.getValueType();

905if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&

906 !DemandedElts[CIdx->getZExtValue()])

907return Vec;

908break;

909 }

910caseISD::INSERT_SUBVECTOR: {

911if (VT.isScalableVector())

912returnSDValue();

913

914SDValue Vec =Op.getOperand(0);

915SDValue Sub =Op.getOperand(1);

916uint64_t Idx =Op.getConstantOperandVal(2);

917unsigned NumSubElts = Sub.getValueType().getVectorNumElements();

918APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts,Idx);

919// If we don't demand the inserted subvector, return the base vector.

920if (DemandedSubElts == 0)

921return Vec;

922break;

923 }

924caseISD::VECTOR_SHUFFLE: {

925assert(!VT.isScalableVector());

926ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();

927

928// If all the demanded elts are from one operand and are inline,

929// then we can use the operand directly.

930bool AllUndef =true, IdentityLHS =true, IdentityRHS =true;

931for (unsigned i = 0; i != NumElts; ++i) {

932int M = ShuffleMask[i];

933if (M < 0 || !DemandedElts[i])

934continue;

935 AllUndef =false;

936 IdentityLHS &= (M == (int)i);

937 IdentityRHS &= ((M - NumElts) == i);

938 }

939

940if (AllUndef)

941return DAG.getUNDEF(Op.getValueType());

942if (IdentityLHS)

943returnOp.getOperand(0);

944if (IdentityRHS)

945returnOp.getOperand(1);

946break;

947 }

948default:

949// TODO: Probably okay to remove after audit; here to reduce change size

950// in initial enablement patch for scalable vectors

951if (VT.isScalableVector())

952returnSDValue();

953

954if (Op.getOpcode() >=ISD::BUILTIN_OP_END)

955if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(

956Op,DemandedBits, DemandedElts, DAG,Depth))

957return V;

958break;

959 }

960returnSDValue();

961}

962

963SDValue TargetLowering::SimplifyMultipleUseDemandedBits(

964SDValue Op,constAPInt &DemandedBits,SelectionDAG &DAG,

965unsignedDepth) const{

966EVT VT =Op.getValueType();

967// Since the number of lanes in a scalable vector is unknown at compile time,

968// we track one bit which is implicitly broadcast to all lanes. This means

969// that all lanes in a scalable vector are considered demanded.

970APInt DemandedElts = VT.isFixedLengthVector()

971 ?APInt::getAllOnes(VT.getVectorNumElements())

972 :APInt(1, 1);

973return SimplifyMultipleUseDemandedBits(Op,DemandedBits, DemandedElts, DAG,

974Depth);

975}

976

977SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(

978SDValue Op,constAPInt &DemandedElts,SelectionDAG &DAG,

979unsignedDepth) const{

980APInt DemandedBits =APInt::getAllOnes(Op.getScalarValueSizeInBits());

981return SimplifyMultipleUseDemandedBits(Op,DemandedBits, DemandedElts, DAG,

982Depth);

983}

984

985// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).

986// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).

987staticSDValue combineShiftToAVG(SDValue Op,

988TargetLowering::TargetLoweringOpt &TLO,

989constTargetLowering &TLI,

990constAPInt &DemandedBits,

991constAPInt &DemandedElts,unsignedDepth) {

992assert((Op.getOpcode() ==ISD::SRL ||Op.getOpcode() ==ISD::SRA) &&

993"SRL or SRA node is required here!");

994// Is the right shift using an immediate value of 1?

995ConstantSDNode *N1C =isConstOrConstSplat(Op.getOperand(1), DemandedElts);

996if (!N1C || !N1C->isOne())

997returnSDValue();

998

999// We are looking for an avgfloor

1000// add(ext, ext)

1001// or one of these as a avgceil

1002// add(add(ext, ext), 1)

1003// add(add(ext, 1), ext)

1004// add(ext, add(ext, 1))

1005SDValue Add =Op.getOperand(0);

1006if (Add.getOpcode() !=ISD::ADD)

1007returnSDValue();

1008

1009SDValue ExtOpA =Add.getOperand(0);

1010SDValue ExtOpB =Add.getOperand(1);

1011SDValue Add2;

1012auto MatchOperands = [&](SDValue Op1,SDValue Op2,SDValue Op3,SDValue A) {

1013ConstantSDNode *ConstOp;

1014if ((ConstOp =isConstOrConstSplat(Op2, DemandedElts)) &&

1015 ConstOp->isOne()) {

1016 ExtOpA = Op1;

1017 ExtOpB = Op3;

1018 Add2 =A;

1019returntrue;

1020 }

1021if ((ConstOp =isConstOrConstSplat(Op3, DemandedElts)) &&

1022 ConstOp->isOne()) {

1023 ExtOpA = Op1;

1024 ExtOpB = Op2;

1025 Add2 =A;

1026returntrue;

1027 }

1028returnfalse;

1029 };

1030bool IsCeil =

1031 (ExtOpA.getOpcode() ==ISD::ADD &&

1032 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||

1033 (ExtOpB.getOpcode() ==ISD::ADD &&

1034 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));

1035

1036// If the shift is signed (sra):

1037// - Needs >= 2 sign bit for both operands.

1038// - Needs >= 2 zero bits.

1039// If the shift is unsigned (srl):

1040// - Needs >= 1 zero bit for both operands.

1041// - Needs 1 demanded bit zero and >= 2 sign bits.

1042SelectionDAG &DAG = TLO.DAG;

1043unsigned ShiftOpc =Op.getOpcode();

1044bool IsSigned =false;

1045unsignedKnownBits;

1046unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts,Depth);

1047unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts,Depth);

1048unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;

1049unsigned NumZeroA =

1050 DAG.computeKnownBits(ExtOpA, DemandedElts,Depth).countMinLeadingZeros();

1051unsigned NumZeroB =

1052 DAG.computeKnownBits(ExtOpB, DemandedElts,Depth).countMinLeadingZeros();

1053unsigned NumZero = std::min(NumZeroA, NumZeroB);

1054

1055switch (ShiftOpc) {

1056default:

1057llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");

1058caseISD::SRA: {

1059if (NumZero >= 2 && NumSigned < NumZero) {

1060 IsSigned =false;

1061KnownBits = NumZero;

1062break;

1063 }

1064if (NumSigned >= 1) {

1065 IsSigned =true;

1066KnownBits = NumSigned;

1067break;

1068 }

1069returnSDValue();

1070 }

1071caseISD::SRL: {

1072if (NumZero >= 1 && NumSigned < NumZero) {

1073 IsSigned =false;

1074KnownBits = NumZero;

1075break;

1076 }

1077if (NumSigned >= 1 &&DemandedBits.isSignBitClear()) {

1078 IsSigned =true;

1079KnownBits = NumSigned;

1080break;

1081 }

1082returnSDValue();

1083 }

1084 }

1085

1086unsigned AVGOpc = IsCeil ? (IsSigned ?ISD::AVGCEILS :ISD::AVGCEILU)

1087 : (IsSigned ?ISD::AVGFLOORS :ISD::AVGFLOORU);

1088

1089// Find the smallest power-2 type that is legal for this vector size and

1090// operation, given the original type size and the number of known sign/zero

1091// bits.

1092EVT VT =Op.getValueType();

1093unsigned MinWidth =

1094 std::max<unsigned>(VT.getScalarSizeInBits() -KnownBits, 8);

1095EVT NVT =EVT::getIntegerVT(*DAG.getContext(),llvm::bit_ceil(MinWidth));

1096if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())

1097returnSDValue();

1098if (VT.isVector())

1099 NVT =EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());

1100if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {

1101// If we could not transform, and (both) adds are nuw/nsw, we can use the

1102// larger type size to do the transform.

1103if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))

1104returnSDValue();

1105if (DAG.willNotOverflowAdd(IsSigned,Add.getOperand(0),

1106Add.getOperand(1)) &&

1107 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),

1108 Add2.getOperand(1))))

1109 NVT = VT;

1110else

1111returnSDValue();

1112 }

1113

1114// Don't create a AVGFLOOR node with a scalar constant unless its legal as

1115// this is likely to stop other folds (reassociation, value tracking etc.)

1116if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&

1117 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))

1118returnSDValue();

1119

1120SDLoc DL(Op);

1121SDValue ResultAVG =

1122 DAG.getNode(AVGOpc,DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA,DL, NVT),

1123 DAG.getExtOrTrunc(IsSigned, ExtOpB,DL, NVT));

1124return DAG.getExtOrTrunc(IsSigned, ResultAVG,DL, VT);

1125}

1126

1127/// Look at Op. At this point, we know that only the OriginalDemandedBits of the

1128/// result of Op are ever used downstream. If we can use this information to

1129/// simplify Op, create a new simplified DAG node and return true, returning the

1130/// original and new nodes in Old and New. Otherwise, analyze the expression and

1131/// return a mask of Known bits for the expression (used to simplify the

1132/// caller). The Known bits may only be accurate for those bits in the

1133/// OriginalDemandedBits and OriginalDemandedElts.

1134boolTargetLowering::SimplifyDemandedBits(

1135SDValue Op,constAPInt &OriginalDemandedBits,

1136constAPInt &OriginalDemandedElts,KnownBits &Known,TargetLoweringOpt &TLO,

1137unsignedDepth,bool AssumeSingleUse) const{

1138unsignedBitWidth = OriginalDemandedBits.getBitWidth();

1139assert(Op.getScalarValueSizeInBits() ==BitWidth &&

1140"Mask size mismatches value type size!");

1141

1142// Don't know anything.

1143 Known =KnownBits(BitWidth);

1144

1145EVT VT =Op.getValueType();

1146bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();

1147unsigned NumElts = OriginalDemandedElts.getBitWidth();

1148assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&

1149"Unexpected vector size");

1150

1151APInt DemandedBits = OriginalDemandedBits;

1152APInt DemandedElts = OriginalDemandedElts;

1153SDLoc dl(Op);

1154

1155// Undef operand.

1156if (Op.isUndef())

1157returnfalse;

1158

1159// We can't simplify target constants.

1160if (Op.getOpcode() ==ISD::TargetConstant)

1161returnfalse;

1162

1163if (Op.getOpcode() ==ISD::Constant) {

1164// We know all of the bits for a constant!

1165 Known =KnownBits::makeConstant(Op->getAsAPIntVal());

1166returnfalse;

1167 }

1168

1169if (Op.getOpcode() ==ISD::ConstantFP) {

1170// We know all of the bits for a floating point constant!

1171 Known =KnownBits::makeConstant(

1172 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());

1173returnfalse;

1174 }

1175

1176// Other users may use these bits.

1177bool HasMultiUse =false;

1178if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {

1179if (Depth >=SelectionDAG::MaxRecursionDepth) {

1180// Limit search depth.

1181returnfalse;

1182 }

1183// Allow multiple uses, just set the DemandedBits/Elts to all bits.

1184DemandedBits =APInt::getAllOnes(BitWidth);

1185 DemandedElts =APInt::getAllOnes(NumElts);

1186 HasMultiUse =true;

1187 }elseif (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {

1188// Not demanding any bits/elts from Op.

1189return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

1190 }elseif (Depth >=SelectionDAG::MaxRecursionDepth) {

1191// Limit search depth.

1192returnfalse;

1193 }

1194

1195KnownBits Known2;

1196switch (Op.getOpcode()) {

1197caseISD::SCALAR_TO_VECTOR: {

1198if (VT.isScalableVector())

1199returnfalse;

1200if (!DemandedElts[0])

1201return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

1202

1203KnownBits SrcKnown;

1204SDValue Src =Op.getOperand(0);

1205unsigned SrcBitWidth = Src.getScalarValueSizeInBits();

1206APInt SrcDemandedBits =DemandedBits.zext(SrcBitWidth);

1207if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO,Depth + 1))

1208returntrue;

1209

1210// Upper elements are undef, so only get the knownbits if we just demand

1211// the bottom element.

1212if (DemandedElts == 1)

1213 Known = SrcKnown.anyextOrTrunc(BitWidth);

1214break;

1215 }

1216caseISD::BUILD_VECTOR:

1217// Collect the known bits that are shared by every demanded element.

1218// TODO: Call SimplifyDemandedBits for non-constant demanded elements.

1219 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);

1220returnfalse;// Don't fall through, will infinitely loop.

1221caseISD::SPLAT_VECTOR: {

1222SDValue Scl =Op.getOperand(0);

1223APInt DemandedSclBits =DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());

1224KnownBits KnownScl;

1225if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO,Depth + 1))

1226returntrue;

1227

1228// Implicitly truncate the bits to match the official semantics of

1229// SPLAT_VECTOR.

1230 Known = KnownScl.trunc(BitWidth);

1231break;

1232 }

1233caseISD::LOAD: {

1234auto *LD = cast<LoadSDNode>(Op);

1235if (getTargetConstantFromLoad(LD)) {

1236 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);

1237returnfalse;// Don't fall through, will infinitely loop.

1238 }

1239if (ISD::isZEXTLoad(Op.getNode()) &&Op.getResNo() == 0) {

1240// If this is a ZEXTLoad and we are looking at the loaded value.

1241EVT MemVT = LD->getMemoryVT();

1242unsigned MemBits = MemVT.getScalarSizeInBits();

1243 Known.Zero.setBitsFrom(MemBits);

1244returnfalse;// Don't fall through, will infinitely loop.

1245 }

1246break;

1247 }

1248caseISD::INSERT_VECTOR_ELT: {

1249if (VT.isScalableVector())

1250returnfalse;

1251SDValue Vec =Op.getOperand(0);

1252SDValue Scl =Op.getOperand(1);

1253auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));

1254EVT VecVT = Vec.getValueType();

1255

1256// If index isn't constant, assume we need all vector elements AND the

1257// inserted element.

1258APInt DemandedVecElts(DemandedElts);

1259if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {

1260unsignedIdx = CIdx->getZExtValue();

1261 DemandedVecElts.clearBit(Idx);

1262

1263// Inserted element is not required.

1264if (!DemandedElts[Idx])

1265return TLO.CombineTo(Op, Vec);

1266 }

1267

1268KnownBits KnownScl;

1269unsigned NumSclBits = Scl.getScalarValueSizeInBits();

1270APInt DemandedSclBits =DemandedBits.zextOrTrunc(NumSclBits);

1271if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO,Depth + 1))

1272returntrue;

1273

1274 Known = KnownScl.anyextOrTrunc(BitWidth);

1275

1276KnownBits KnownVec;

1277if (SimplifyDemandedBits(Vec,DemandedBits, DemandedVecElts, KnownVec, TLO,

1278Depth + 1))

1279returntrue;

1280

1281if (!!DemandedVecElts)

1282 Known = Known.intersectWith(KnownVec);

1283

1284returnfalse;

1285 }

1286caseISD::INSERT_SUBVECTOR: {

1287if (VT.isScalableVector())

1288returnfalse;

1289// Demand any elements from the subvector and the remainder from the src its

1290// inserted into.

1291SDValue Src =Op.getOperand(0);

1292SDValue Sub =Op.getOperand(1);

1293uint64_t Idx =Op.getConstantOperandVal(2);

1294unsigned NumSubElts = Sub.getValueType().getVectorNumElements();

1295APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts,Idx);

1296APInt DemandedSrcElts = DemandedElts;

1297 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts),Idx);

1298

1299KnownBits KnownSub, KnownSrc;

1300if (SimplifyDemandedBits(Sub,DemandedBits, DemandedSubElts, KnownSub, TLO,

1301Depth + 1))

1302returntrue;

1303if (SimplifyDemandedBits(Src,DemandedBits, DemandedSrcElts, KnownSrc, TLO,

1304Depth + 1))

1305returntrue;

1306

1307 Known.Zero.setAllBits();

1308 Known.One.setAllBits();

1309if (!!DemandedSubElts)

1310 Known = Known.intersectWith(KnownSub);

1311if (!!DemandedSrcElts)

1312 Known = Known.intersectWith(KnownSrc);

1313

1314// Attempt to avoid multi-use src if we don't need anything from it.

1315if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||

1316 !DemandedSrcElts.isAllOnes()) {

1317SDValue NewSub = SimplifyMultipleUseDemandedBits(

1318 Sub,DemandedBits, DemandedSubElts, TLO.DAG,Depth + 1);

1319SDValue NewSrc = SimplifyMultipleUseDemandedBits(

1320 Src,DemandedBits, DemandedSrcElts, TLO.DAG,Depth + 1);

1321if (NewSub || NewSrc) {

1322 NewSub = NewSub ? NewSub : Sub;

1323 NewSrc = NewSrc ? NewSrc : Src;

1324SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,

1325Op.getOperand(2));

1326return TLO.CombineTo(Op, NewOp);

1327 }

1328 }

1329break;

1330 }

1331caseISD::EXTRACT_SUBVECTOR: {

1332if (VT.isScalableVector())

1333returnfalse;

1334// Offset the demanded elts by the subvector index.

1335SDValue Src =Op.getOperand(0);

1336if (Src.getValueType().isScalableVector())

1337break;

1338uint64_t Idx =Op.getConstantOperandVal(1);

1339unsigned NumSrcElts = Src.getValueType().getVectorNumElements();

1340APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);

1341

1342if (SimplifyDemandedBits(Src,DemandedBits, DemandedSrcElts, Known, TLO,

1343Depth + 1))

1344returntrue;

1345

1346// Attempt to avoid multi-use src if we don't need anything from it.

1347if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {

1348SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(

1349 Src,DemandedBits, DemandedSrcElts, TLO.DAG,Depth + 1);

1350if (DemandedSrc) {

1351SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,

1352Op.getOperand(1));

1353return TLO.CombineTo(Op, NewOp);

1354 }

1355 }

1356break;

1357 }

1358caseISD::CONCAT_VECTORS: {

1359if (VT.isScalableVector())

1360returnfalse;

1361 Known.Zero.setAllBits();

1362 Known.One.setAllBits();

1363EVT SubVT =Op.getOperand(0).getValueType();

1364unsigned NumSubVecs =Op.getNumOperands();

1365unsigned NumSubElts = SubVT.getVectorNumElements();

1366for (unsigned i = 0; i != NumSubVecs; ++i) {

1367APInt DemandedSubElts =

1368 DemandedElts.extractBits(NumSubElts, i * NumSubElts);

1369if (SimplifyDemandedBits(Op.getOperand(i),DemandedBits, DemandedSubElts,

1370 Known2, TLO,Depth + 1))

1371returntrue;

1372// Known bits are shared by every demanded subvector element.

1373if (!!DemandedSubElts)

1374 Known = Known.intersectWith(Known2);

1375 }

1376break;

1377 }

1378caseISD::VECTOR_SHUFFLE: {

1379assert(!VT.isScalableVector());

1380ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();

1381

1382// Collect demanded elements from shuffle operands..

1383APInt DemandedLHS, DemandedRHS;

1384if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,

1385 DemandedRHS))

1386break;

1387

1388if (!!DemandedLHS || !!DemandedRHS) {

1389SDValue Op0 =Op.getOperand(0);

1390SDValue Op1 =Op.getOperand(1);

1391

1392 Known.Zero.setAllBits();

1393 Known.One.setAllBits();

1394if (!!DemandedLHS) {

1395if (SimplifyDemandedBits(Op0,DemandedBits, DemandedLHS, Known2, TLO,

1396Depth + 1))

1397returntrue;

1398 Known = Known.intersectWith(Known2);

1399 }

1400if (!!DemandedRHS) {

1401if (SimplifyDemandedBits(Op1,DemandedBits, DemandedRHS, Known2, TLO,

1402Depth + 1))

1403returntrue;

1404 Known = Known.intersectWith(Known2);

1405 }

1406

1407// Attempt to avoid multi-use ops if we don't need anything from them.

1408SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

1409 Op0,DemandedBits, DemandedLHS, TLO.DAG,Depth + 1);

1410SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

1411 Op1,DemandedBits, DemandedRHS, TLO.DAG,Depth + 1);

1412if (DemandedOp0 || DemandedOp1) {

1413 Op0 = DemandedOp0 ? DemandedOp0 : Op0;

1414 Op1 = DemandedOp1 ? DemandedOp1 : Op1;

1415SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);

1416return TLO.CombineTo(Op, NewOp);

1417 }

1418 }

1419break;

1420 }

1421caseISD::AND: {

1422SDValue Op0 =Op.getOperand(0);

1423SDValue Op1 =Op.getOperand(1);

1424

1425// If the RHS is a constant, check to see if the LHS would be zero without

1426// using the bits from the RHS. Below, we use knowledge about the RHS to

1427// simplify the LHS, here we're using information from the LHS to simplify

1428// the RHS.

1429if (ConstantSDNode *RHSC =isConstOrConstSplat(Op1, DemandedElts)) {

1430// Do not increment Depth here; that can cause an infinite loop.

1431KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts,Depth);

1432// If the LHS already has zeros where RHSC does, this 'and' is dead.

1433if ((LHSKnown.Zero &DemandedBits) ==

1434 (~RHSC->getAPIntValue() &DemandedBits))

1435return TLO.CombineTo(Op, Op0);

1436

1437// If any of the set bits in the RHS are known zero on the LHS, shrink

1438// the constant.

1439if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero &DemandedBits,

1440 DemandedElts, TLO))

1441returntrue;

1442

1443// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its

1444// constant, but if this 'and' is only clearing bits that were just set by

1445// the xor, then this 'and' can be eliminated by shrinking the mask of

1446// the xor. For example, for a 32-bit X:

1447// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1

1448if (isBitwiseNot(Op0) && Op0.hasOneUse() &&

1449 LHSKnown.One == ~RHSC->getAPIntValue()) {

1450SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);

1451return TLO.CombineTo(Op,Xor);

1452 }

1453 }

1454

1455// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)

1456// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).

1457if (Op0.getOpcode() ==ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&

1458 (Op0.getOperand(0).isUndef() ||

1459ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&

1460 Op0->hasOneUse()) {

1461unsigned NumSubElts =

1462 Op0.getOperand(1).getValueType().getVectorNumElements();

1463unsigned SubIdx = Op0.getConstantOperandVal(2);

1464APInt DemandedSub =

1465APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);

1466KnownBits KnownSubMask =

1467 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts,Depth + 1);

1468if (DemandedBits.isSubsetOf(KnownSubMask.One)) {

1469SDValue NewAnd =

1470 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);

1471SDValue NewInsert =

1472 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,

1473 Op0.getOperand(1), Op0.getOperand(2));

1474return TLO.CombineTo(Op, NewInsert);

1475 }

1476 }

1477

1478if (SimplifyDemandedBits(Op1,DemandedBits, DemandedElts, Known, TLO,

1479Depth + 1))

1480returntrue;

1481if (SimplifyDemandedBits(Op0, ~Known.Zero &DemandedBits, DemandedElts,

1482 Known2, TLO,Depth + 1))

1483returntrue;

1484

1485// If all of the demanded bits are known one on one side, return the other.

1486// These bits cannot contribute to the result of the 'and'.

1487if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))

1488return TLO.CombineTo(Op, Op0);

1489if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))

1490return TLO.CombineTo(Op, Op1);

1491// If all of the demanded bits in the inputs are known zeros, return zero.

1492if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))

1493return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));

1494// If the RHS is a constant, see if we can simplify it.

1495if (ShrinkDemandedConstant(Op, ~Known2.Zero &DemandedBits, DemandedElts,

1496 TLO))

1497returntrue;

1498// If the operation can be done in a smaller type, do so.

1499if (ShrinkDemandedOp(Op,BitWidth,DemandedBits, TLO))

1500returntrue;

1501

1502// Attempt to avoid multi-use ops if we don't need anything from them.

1503if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {

1504SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

1505 Op0,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);

1506SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

1507 Op1,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);

1508if (DemandedOp0 || DemandedOp1) {

1509 Op0 = DemandedOp0 ? DemandedOp0 : Op0;

1510 Op1 = DemandedOp1 ? DemandedOp1 : Op1;

1511SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);

1512return TLO.CombineTo(Op, NewOp);

1513 }

1514 }

1515

1516 Known &= Known2;

1517break;

1518 }

1519caseISD::OR: {

1520SDValue Op0 =Op.getOperand(0);

1521SDValue Op1 =Op.getOperand(1);

1522if (SimplifyDemandedBits(Op1,DemandedBits, DemandedElts, Known, TLO,

1523Depth + 1)) {

1524Op->dropFlags(SDNodeFlags::Disjoint);

1525returntrue;

1526 }

1527

1528if (SimplifyDemandedBits(Op0, ~Known.One &DemandedBits, DemandedElts,

1529 Known2, TLO,Depth + 1)) {

1530Op->dropFlags(SDNodeFlags::Disjoint);

1531returntrue;

1532 }

1533

1534// If all of the demanded bits are known zero on one side, return the other.

1535// These bits cannot contribute to the result of the 'or'.

1536if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))

1537return TLO.CombineTo(Op, Op0);

1538if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))

1539return TLO.CombineTo(Op, Op1);

1540// If the RHS is a constant, see if we can simplify it.

1541if (ShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))

1542returntrue;

1543// If the operation can be done in a smaller type, do so.

1544if (ShrinkDemandedOp(Op,BitWidth,DemandedBits, TLO))

1545returntrue;

1546

1547// Attempt to avoid multi-use ops if we don't need anything from them.

1548if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {

1549SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

1550 Op0,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);

1551SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

1552 Op1,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);

1553if (DemandedOp0 || DemandedOp1) {

1554 Op0 = DemandedOp0 ? DemandedOp0 : Op0;

1555 Op1 = DemandedOp1 ? DemandedOp1 : Op1;

1556SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);

1557return TLO.CombineTo(Op, NewOp);

1558 }

1559 }

1560

1561// (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))

1562// TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.

1563if (Op0.getOpcode() ==ISD::AND && Op1.getOpcode() ==ISD::AND &&

1564 Op0->hasOneUse() && Op1->hasOneUse()) {

1565// Attempt to match all commutations - m_c_Or would've been useful!

1566for (intI = 0;I != 2; ++I) {

1567SDValue X =Op.getOperand(I).getOperand(0);

1568SDValue C1 =Op.getOperand(I).getOperand(1);

1569SDValue Alt =Op.getOperand(1 -I).getOperand(0);

1570SDValue C2 =Op.getOperand(1 -I).getOperand(1);

1571if (Alt.getOpcode() ==ISD::OR) {

1572for (int J = 0; J != 2; ++J) {

1573if (X == Alt.getOperand(J)) {

1574SDValue Y = Alt.getOperand(1 - J);

1575if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,

1576 {C1, C2})) {

1577SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT,X, C12);

1578SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT,Y, C2);

1579return TLO.CombineTo(

1580Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));

1581 }

1582 }

1583 }

1584 }

1585 }

1586 }

1587

1588 Known |= Known2;

1589break;

1590 }

1591caseISD::XOR: {

1592SDValue Op0 =Op.getOperand(0);

1593SDValue Op1 =Op.getOperand(1);

1594

1595if (SimplifyDemandedBits(Op1,DemandedBits, DemandedElts, Known, TLO,

1596Depth + 1))

1597returntrue;

1598if (SimplifyDemandedBits(Op0,DemandedBits, DemandedElts, Known2, TLO,

1599Depth + 1))

1600returntrue;

1601

1602// If all of the demanded bits are known zero on one side, return the other.

1603// These bits cannot contribute to the result of the 'xor'.

1604if (DemandedBits.isSubsetOf(Known.Zero))

1605return TLO.CombineTo(Op, Op0);

1606if (DemandedBits.isSubsetOf(Known2.Zero))

1607return TLO.CombineTo(Op, Op1);

1608// If the operation can be done in a smaller type, do so.

1609if (ShrinkDemandedOp(Op,BitWidth,DemandedBits, TLO))

1610returntrue;

1611

1612// If all of the unknown bits are known to be zero on one side or the other

1613// turn this into an *inclusive* or.

1614// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0

1615if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))

1616return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));

1617

1618ConstantSDNode *C =isConstOrConstSplat(Op1, DemandedElts);

1619if (C) {

1620// If one side is a constant, and all of the set bits in the constant are

1621// also known set on the other side, turn this into an AND, as we know

1622// the bits will be cleared.

1623// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2

1624// NB: it is okay if more bits are known than are requested

1625if (C->getAPIntValue() == Known2.One) {

1626SDValue ANDC =

1627 TLO.DAG.getConstant(~C->getAPIntValue() &DemandedBits, dl, VT);

1628return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));

1629 }

1630

1631// If the RHS is a constant, see if we can change it. Don't alter a -1

1632// constant because that's a 'not' op, and that is better for combining

1633// and codegen.

1634if (!C->isAllOnes() &&DemandedBits.isSubsetOf(C->getAPIntValue())) {

1635// We're flipping all demanded bits. Flip the undemanded bits too.

1636SDValue New = TLO.DAG.getNOT(dl, Op0, VT);

1637return TLO.CombineTo(Op, New);

1638 }

1639

1640unsigned Op0Opcode = Op0.getOpcode();

1641if ((Op0Opcode ==ISD::SRL || Op0Opcode ==ISD::SHL) && Op0.hasOneUse()) {

1642if (ConstantSDNode *ShiftC =

1643isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {

1644// Don't crash on an oversized shift. We can not guarantee that a

1645// bogus shift has been simplified to undef.

1646if (ShiftC->getAPIntValue().ult(BitWidth)) {

1647uint64_t ShiftAmt = ShiftC->getZExtValue();

1648APInt Ones =APInt::getAllOnes(BitWidth);

1649 Ones = Op0Opcode ==ISD::SHL ? Ones.shl(ShiftAmt)

1650 : Ones.lshr(ShiftAmt);

1651if ((DemandedBits &C->getAPIntValue()) == (DemandedBits & Ones) &&

1652 isDesirableToCommuteXorWithShift(Op.getNode())) {

1653// If the xor constant is a demanded mask, do a 'not' before the

1654// shift:

1655// xor (X << ShiftC), XorC --> (not X) << ShiftC

1656// xor (X >> ShiftC), XorC --> (not X) >> ShiftC

1657SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);

1658return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,

1659 Op0.getOperand(1)));

1660 }

1661 }

1662 }

1663 }

1664 }

1665

1666// If we can't turn this into a 'not', try to shrink the constant.

1667if (!C || !C->isAllOnes())

1668if (ShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))

1669returntrue;

1670

1671// Attempt to avoid multi-use ops if we don't need anything from them.

1672if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {

1673SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

1674 Op0,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);

1675SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

1676 Op1,DemandedBits, DemandedElts, TLO.DAG,Depth + 1);

1677if (DemandedOp0 || DemandedOp1) {

1678 Op0 = DemandedOp0 ? DemandedOp0 : Op0;

1679 Op1 = DemandedOp1 ? DemandedOp1 : Op1;

1680SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);

1681return TLO.CombineTo(Op, NewOp);

1682 }

1683 }

1684

1685 Known ^= Known2;

1686break;

1687 }

1688caseISD::SELECT:

1689if (SimplifyDemandedBits(Op.getOperand(2),DemandedBits, DemandedElts,

1690 Known, TLO,Depth + 1))

1691returntrue;

1692if (SimplifyDemandedBits(Op.getOperand(1),DemandedBits, DemandedElts,

1693 Known2, TLO,Depth + 1))

1694returntrue;

1695

1696// If the operands are constants, see if we can simplify them.

1697if (ShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))

1698returntrue;

1699

1700// Only known if known in both the LHS and RHS.

1701 Known = Known.intersectWith(Known2);

1702break;

1703caseISD::VSELECT:

1704if (SimplifyDemandedBits(Op.getOperand(2),DemandedBits, DemandedElts,

1705 Known, TLO,Depth + 1))

1706returntrue;

1707if (SimplifyDemandedBits(Op.getOperand(1),DemandedBits, DemandedElts,

1708 Known2, TLO,Depth + 1))

1709returntrue;

1710

1711// Only known if known in both the LHS and RHS.

1712 Known = Known.intersectWith(Known2);

1713break;

1714caseISD::SELECT_CC:

1715if (SimplifyDemandedBits(Op.getOperand(3),DemandedBits, DemandedElts,

1716 Known, TLO,Depth + 1))

1717returntrue;

1718if (SimplifyDemandedBits(Op.getOperand(2),DemandedBits, DemandedElts,

1719 Known2, TLO,Depth + 1))

1720returntrue;

1721

1722// If the operands are constants, see if we can simplify them.

1723if (ShrinkDemandedConstant(Op,DemandedBits, DemandedElts, TLO))

1724returntrue;

1725

1726// Only known if known in both the LHS and RHS.

1727 Known = Known.intersectWith(Known2);

1728break;

1729caseISD::SETCC: {

1730SDValue Op0 =Op.getOperand(0);

1731SDValue Op1 =Op.getOperand(1);

1732ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

1733// If (1) we only need the sign-bit, (2) the setcc operands are the same

1734// width as the setcc result, and (3) the result of a setcc conforms to 0 or

1735// -1, we may be able to bypass the setcc.

1736if (DemandedBits.isSignMask() &&

1737 Op0.getScalarValueSizeInBits() ==BitWidth &&

1738getBooleanContents(Op0.getValueType()) ==

1739BooleanContent::ZeroOrNegativeOneBooleanContent) {

1740// If we're testing X < 0, then this compare isn't needed - just use X!

1741// FIXME: We're limiting to integer types here, but this should also work

1742// if we don't care about FP signed-zero. The use of SETLT with FP means

1743// that we don't care about NaNs.

1744if (CC ==ISD::SETLT && Op1.getValueType().isInteger() &&

1745 (isNullConstant(Op1) ||ISD::isBuildVectorAllZeros(Op1.getNode())))

1746return TLO.CombineTo(Op, Op0);

1747

1748// TODO: Should we check for other forms of sign-bit comparisons?

1749// Examples: X <= -1, X >= 0

1750 }

1751if (getBooleanContents(Op0.getValueType()) ==

1752TargetLowering::ZeroOrOneBooleanContent &&

1753BitWidth > 1)

1754 Known.Zero.setBitsFrom(1);

1755break;

1756 }

1757caseISD::SHL: {

1758SDValue Op0 =Op.getOperand(0);

1759SDValue Op1 =Op.getOperand(1);

1760EVT ShiftVT = Op1.getValueType();

1761

1762if (std::optional<uint64_t> KnownSA =

1763 TLO.DAG.getValidShiftAmount(Op, DemandedElts,Depth + 1)) {

1764unsigned ShAmt = *KnownSA;

1765if (ShAmt == 0)

1766return TLO.CombineTo(Op, Op0);

1767

1768// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a

1769// single shift. We can do this if the bottom bits (which are shifted

1770// out) are never demanded.

1771// TODO - support non-uniform vector amounts.

1772if (Op0.getOpcode() ==ISD::SRL) {

1773if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {

1774if (std::optional<uint64_t> InnerSA =

1775 TLO.DAG.getValidShiftAmount(Op0, DemandedElts,Depth + 2)) {

1776unsigned C1 = *InnerSA;

1777unsigned Opc =ISD::SHL;

1778int Diff = ShAmt - C1;

1779if (Diff < 0) {

1780 Diff = -Diff;

1781 Opc =ISD::SRL;

1782 }

1783SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);

1784return TLO.CombineTo(

1785Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));

1786 }

1787 }

1788 }

1789

1790// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits

1791// are not demanded. This will likely allow the anyext to be folded away.

1792// TODO - support non-uniform vector amounts.

1793if (Op0.getOpcode() ==ISD::ANY_EXTEND) {

1794SDValue InnerOp = Op0.getOperand(0);

1795EVT InnerVT = InnerOp.getValueType();

1796unsigned InnerBits = InnerVT.getScalarSizeInBits();

1797if (ShAmt < InnerBits &&DemandedBits.getActiveBits() <= InnerBits &&

1798 isTypeDesirableForOp(ISD::SHL, InnerVT)) {

1799SDValue NarrowShl = TLO.DAG.getNode(

1800ISD::SHL, dl, InnerVT, InnerOp,

1801 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));

1802return TLO.CombineTo(

1803Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));

1804 }

1805

1806// Repeat the SHL optimization above in cases where an extension

1807// intervenes: (shl (anyext (shr x, c1)), c2) to

1808// (shl (anyext x), c2-c1). This requires that the bottom c1 bits

1809// aren't demanded (as above) and that the shifted upper c1 bits of

1810// x aren't demanded.

1811// TODO - support non-uniform vector amounts.

1812if (InnerOp.getOpcode() ==ISD::SRL && Op0.hasOneUse() &&

1813 InnerOp.hasOneUse()) {

1814if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(

1815 InnerOp, DemandedElts,Depth + 2)) {

1816unsigned InnerShAmt = *SA2;

1817if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&

1818DemandedBits.getActiveBits() <=

1819 (InnerBits - InnerShAmt + ShAmt) &&

1820DemandedBits.countr_zero() >= ShAmt) {

1821SDValue NewSA =

1822 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);

1823SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,

1824 InnerOp.getOperand(0));

1825return TLO.CombineTo(

1826Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));

1827 }

1828 }

1829 }

1830 }

1831

1832APInt InDemandedMask =DemandedBits.lshr(ShAmt);

1833if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,

1834Depth + 1)) {

1835// Disable the nsw and nuw flags. We can no longer guarantee that we

1836// won't wrap after simplification.

1837Op->dropFlags(SDNodeFlags::NoWrap);

1838returntrue;

1839 }

1840 Known.Zero <<= ShAmt;

1841 Known.One <<= ShAmt;

1842// low bits known zero.

1843 Known.Zero.setLowBits(ShAmt);

1844

1845// Attempt to avoid multi-use ops if we don't need anything from them.

1846if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {

1847SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

1848 Op0, InDemandedMask, DemandedElts, TLO.DAG,Depth + 1);

1849if (DemandedOp0) {

1850SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);

1851return TLO.CombineTo(Op, NewOp);

1852 }

1853 }

1854

1855// TODO: Can we merge this fold with the one below?

1856// Try shrinking the operation as long as the shift amount will still be

1857// in range.

1858if (ShAmt <DemandedBits.getActiveBits() && !VT.isVector() &&

1859Op.getNode()->hasOneUse()) {

1860// Search for the smallest integer type with free casts to and from

1861// Op's type. For expedience, just check power-of-2 integer types.

1862unsigned DemandedSize =DemandedBits.getActiveBits();

1863for (unsigned SmallVTBits =llvm::bit_ceil(DemandedSize);

1864 SmallVTBits <BitWidth; SmallVTBits =NextPowerOf2(SmallVTBits)) {

1865EVT SmallVT =EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);

1866if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&

1867 isTypeDesirableForOp(ISD::SHL, SmallVT) &&

1868isTruncateFree(VT, SmallVT) &&isZExtFree(SmallVT, VT) &&

1869 (!TLO.LegalOperations() ||isOperationLegal(ISD::SHL, SmallVT))) {

1870assert(DemandedSize <= SmallVTBits &&

1871"Narrowed below demanded bits?");

1872// We found a type with free casts.

1873SDValue NarrowShl = TLO.DAG.getNode(

1874ISD::SHL, dl, SmallVT,

1875 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT,Op.getOperand(0)),

1876 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));

1877return TLO.CombineTo(

1878Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));

1879 }

1880 }

1881 }

1882

1883// Narrow shift to lower half - similar to ShrinkDemandedOp.

1884// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))

1885// Only do this if we demand the upper half so the knownbits are correct.

1886unsigned HalfWidth =BitWidth / 2;

1887if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&

1888DemandedBits.countLeadingOnes() >= HalfWidth) {

1889EVT HalfVT =EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);

1890if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&

1891 isTypeDesirableForOp(ISD::SHL, HalfVT) &&

1892isTruncateFree(VT, HalfVT) &&isZExtFree(HalfVT, VT) &&

1893 (!TLO.LegalOperations() ||isOperationLegal(ISD::SHL, HalfVT))) {

1894// If we're demanding the upper bits at all, we must ensure

1895// that the upper bits of the shift result are known to be zero,

1896// which is equivalent to the narrow shift being NUW.

1897if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {

1898bool IsNSW = Known.countMinSignBits() > HalfWidth;

1899SDNodeFlags Flags;

1900 Flags.setNoSignedWrap(IsNSW);

1901 Flags.setNoUnsignedWrap(IsNUW);

1902SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);

1903SDValue NewShiftAmt =

1904 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);

1905SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,

1906 NewShiftAmt, Flags);

1907SDValue NewExt =

1908 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);

1909return TLO.CombineTo(Op, NewExt);

1910 }

1911 }

1912 }

1913 }else {

1914// This is a variable shift, so we can't shift the demand mask by a known

1915// amount. But if we are not demanding high bits, then we are not

1916// demanding those bits from the pre-shifted operand either.

1917if (unsigned CTLZ =DemandedBits.countl_zero()) {

1918APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth,BitWidth - CTLZ));

1919if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,

1920Depth + 1)) {

1921// Disable the nsw and nuw flags. We can no longer guarantee that we

1922// won't wrap after simplification.

1923Op->dropFlags(SDNodeFlags::NoWrap);

1924returntrue;

1925 }

1926 Known.resetAll();

1927 }

1928 }

1929

1930// If we are only demanding sign bits then we can use the shift source

1931// directly.

1932if (std::optional<uint64_t> MaxSA =

1933 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts,Depth + 1)) {

1934unsigned ShAmt = *MaxSA;

1935unsigned NumSignBits =

1936 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);

1937unsigned UpperDemandedBits =BitWidth -DemandedBits.countr_zero();

1938if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))

1939return TLO.CombineTo(Op, Op0);

1940 }

1941break;

1942 }

1943caseISD::SRL: {

1944SDValue Op0 =Op.getOperand(0);

1945SDValue Op1 =Op.getOperand(1);

1946EVT ShiftVT = Op1.getValueType();

1947

1948if (std::optional<uint64_t> KnownSA =

1949 TLO.DAG.getValidShiftAmount(Op, DemandedElts,Depth + 1)) {

1950unsigned ShAmt = *KnownSA;

1951if (ShAmt == 0)

1952return TLO.CombineTo(Op, Op0);

1953

1954// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a

1955// single shift. We can do this if the top bits (which are shifted out)

1956// are never demanded.

1957// TODO - support non-uniform vector amounts.

1958if (Op0.getOpcode() ==ISD::SHL) {

1959if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {

1960if (std::optional<uint64_t> InnerSA =

1961 TLO.DAG.getValidShiftAmount(Op0, DemandedElts,Depth + 2)) {

1962unsigned C1 = *InnerSA;

1963unsigned Opc =ISD::SRL;

1964int Diff = ShAmt - C1;

1965if (Diff < 0) {

1966 Diff = -Diff;

1967 Opc =ISD::SHL;

1968 }

1969SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);

1970return TLO.CombineTo(

1971Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));

1972 }

1973 }

1974 }

1975

1976// If this is (srl (sra X, C1), ShAmt), see if we can combine this into a

1977// single sra. We can do this if the top bits are never demanded.

1978if (Op0.getOpcode() ==ISD::SRA && Op0.hasOneUse()) {

1979if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {

1980if (std::optional<uint64_t> InnerSA =

1981 TLO.DAG.getValidShiftAmount(Op0, DemandedElts,Depth + 2)) {

1982unsigned C1 = *InnerSA;

1983// Clamp the combined shift amount if it exceeds the bit width.

1984unsigned Combined = std::min(C1 + ShAmt,BitWidth - 1);

1985SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);

1986return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,

1987 Op0.getOperand(0), NewSA));

1988 }

1989 }

1990 }

1991

1992APInt InDemandedMask = (DemandedBits << ShAmt);

1993

1994// If the shift is exact, then it does demand the low bits (and knows that

1995// they are zero).

1996if (Op->getFlags().hasExact())

1997 InDemandedMask.setLowBits(ShAmt);

1998

1999// Narrow shift to lower half - similar to ShrinkDemandedOp.

2000// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))

2001if ((BitWidth % 2) == 0 && !VT.isVector()) {

2002APInt HiBits =APInt::getHighBitsSet(BitWidth,BitWidth / 2);

2003EVT HalfVT =EVT::getIntegerVT(*TLO.DAG.getContext(),BitWidth / 2);

2004if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&

2005 isTypeDesirableForOp(ISD::SRL, HalfVT) &&

2006isTruncateFree(VT, HalfVT) &&isZExtFree(HalfVT, VT) &&

2007 (!TLO.LegalOperations() ||isOperationLegal(ISD::SRL, HalfVT)) &&

2008 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||

2009 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {

2010SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);

2011SDValue NewShiftAmt =

2012 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);

2013SDValue NewShift =

2014 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);

2015return TLO.CombineTo(

2016Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));

2017 }

2018 }

2019

2020// Compute the new bits that are at the top now.

2021if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,

2022Depth + 1))

2023returntrue;

2024 Known.Zero.lshrInPlace(ShAmt);

2025 Known.One.lshrInPlace(ShAmt);

2026// High bits known zero.

2027 Known.Zero.setHighBits(ShAmt);

2028

2029// Attempt to avoid multi-use ops if we don't need anything from them.

2030if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {

2031SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

2032 Op0, InDemandedMask, DemandedElts, TLO.DAG,Depth + 1);

2033if (DemandedOp0) {

2034SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);

2035return TLO.CombineTo(Op, NewOp);

2036 }

2037 }

2038 }else {

2039// Use generic knownbits computation as it has support for non-uniform

2040// shift amounts.

2041 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);

2042 }

2043

2044// If we are only demanding sign bits then we can use the shift source

2045// directly.

2046if (std::optional<uint64_t> MaxSA =

2047 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts,Depth + 1)) {

2048unsigned ShAmt = *MaxSA;

2049// Must already be signbits in DemandedBits bounds, and can't demand any

2050// shifted in zeroes.

2051if (DemandedBits.countl_zero() >= ShAmt) {

2052unsigned NumSignBits =

2053 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1);

2054if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))

2055return TLO.CombineTo(Op, Op0);

2056 }

2057 }

2058

2059// Try to match AVG patterns (after shift simplification).

2060if (SDValue AVG =combineShiftToAVG(Op, TLO, *this,DemandedBits,

2061 DemandedElts,Depth + 1))

2062return TLO.CombineTo(Op, AVG);

2063

2064break;

2065 }

2066caseISD::SRA: {

2067SDValue Op0 =Op.getOperand(0);

2068SDValue Op1 =Op.getOperand(1);

2069EVT ShiftVT = Op1.getValueType();

2070

2071// If we only want bits that already match the signbit then we don't need

2072// to shift.

2073unsigned NumHiDemandedBits =BitWidth -DemandedBits.countr_zero();

2074if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1) >=

2075 NumHiDemandedBits)

2076return TLO.CombineTo(Op, Op0);

2077

2078// If this is an arithmetic shift right and only the low-bit is set, we can

2079// always convert this into a logical shr, even if the shift amount is

2080// variable. The low bit of the shift cannot be an input sign bit unless

2081// the shift amount is >= the size of the datatype, which is undefined.

2082if (DemandedBits.isOne())

2083return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));

2084

2085if (std::optional<uint64_t> KnownSA =

2086 TLO.DAG.getValidShiftAmount(Op, DemandedElts,Depth + 1)) {

2087unsigned ShAmt = *KnownSA;

2088if (ShAmt == 0)

2089return TLO.CombineTo(Op, Op0);

2090

2091// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target

2092// supports sext_inreg.

2093if (Op0.getOpcode() ==ISD::SHL) {

2094if (std::optional<uint64_t> InnerSA =

2095 TLO.DAG.getValidShiftAmount(Op0, DemandedElts,Depth + 2)) {

2096unsigned LowBits =BitWidth - ShAmt;

2097EVT ExtVT =EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);

2098if (VT.isVector())

2099 ExtVT =EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,

2100 VT.getVectorElementCount());

2101

2102if (*InnerSA == ShAmt) {

2103if (!TLO.LegalOperations() ||

2104getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==Legal)

2105return TLO.CombineTo(

2106Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,

2107 Op0.getOperand(0),

2108 TLO.DAG.getValueType(ExtVT)));

2109

2110// Even if we can't convert to sext_inreg, we might be able to

2111// remove this shift pair if the input is already sign extended.

2112unsigned NumSignBits =

2113 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);

2114if (NumSignBits > ShAmt)

2115return TLO.CombineTo(Op, Op0.getOperand(0));

2116 }

2117 }

2118 }

2119

2120APInt InDemandedMask = (DemandedBits << ShAmt);

2121

2122// If the shift is exact, then it does demand the low bits (and knows that

2123// they are zero).

2124if (Op->getFlags().hasExact())

2125 InDemandedMask.setLowBits(ShAmt);

2126

2127// If any of the demanded bits are produced by the sign extension, we also

2128// demand the input sign bit.

2129if (DemandedBits.countl_zero() < ShAmt)

2130 InDemandedMask.setSignBit();

2131

2132if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,

2133Depth + 1))

2134returntrue;

2135 Known.Zero.lshrInPlace(ShAmt);

2136 Known.One.lshrInPlace(ShAmt);

2137

2138// If the input sign bit is known to be zero, or if none of the top bits

2139// are demanded, turn this into an unsigned shift right.

2140if (Known.Zero[BitWidth - ShAmt - 1] ||

2141DemandedBits.countl_zero() >= ShAmt) {

2142SDNodeFlags Flags;

2143 Flags.setExact(Op->getFlags().hasExact());

2144return TLO.CombineTo(

2145Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));

2146 }

2147

2148intLog2 =DemandedBits.exactLogBase2();

2149if (Log2 >= 0) {

2150// The bit must come from the sign.

2151SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 -Log2, dl, ShiftVT);

2152return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));

2153 }

2154

2155if (Known.One[BitWidth - ShAmt - 1])

2156// New bits are known one.

2157 Known.One.setHighBits(ShAmt);

2158

2159// Attempt to avoid multi-use ops if we don't need anything from them.

2160if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {

2161SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

2162 Op0, InDemandedMask, DemandedElts, TLO.DAG,Depth + 1);

2163if (DemandedOp0) {

2164SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);

2165return TLO.CombineTo(Op, NewOp);

2166 }

2167 }

2168 }

2169

2170// Try to match AVG patterns (after shift simplification).

2171if (SDValue AVG =combineShiftToAVG(Op, TLO, *this,DemandedBits,

2172 DemandedElts,Depth + 1))

2173return TLO.CombineTo(Op, AVG);

2174

2175break;

2176 }

2177caseISD::FSHL:

2178caseISD::FSHR: {

2179SDValue Op0 =Op.getOperand(0);

2180SDValue Op1 =Op.getOperand(1);

2181SDValue Op2 =Op.getOperand(2);

2182bool IsFSHL = (Op.getOpcode() ==ISD::FSHL);

2183

2184if (ConstantSDNode *SA =isConstOrConstSplat(Op2, DemandedElts)) {

2185unsigned Amt = SA->getAPIntValue().urem(BitWidth);

2186

2187// For fshl, 0-shift returns the 1st arg.

2188// For fshr, 0-shift returns the 2nd arg.

2189if (Amt == 0) {

2190if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1,DemandedBits, DemandedElts,

2191 Known, TLO,Depth + 1))

2192returntrue;

2193break;

2194 }

2195

2196// fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))

2197// fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)

2198APInt Demanded0 =DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));

2199APInt Demanded1 =DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);

2200if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,

2201Depth + 1))

2202returntrue;

2203if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,

2204Depth + 1))

2205returntrue;

2206

2207 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));

2208 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));

2209 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);

2210 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);

2211 Known = Known.unionWith(Known2);

2212

2213// Attempt to avoid multi-use ops if we don't need anything from them.

2214if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||

2215 !DemandedElts.isAllOnes()) {

2216SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

2217 Op0, Demanded0, DemandedElts, TLO.DAG,Depth + 1);

2218SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

2219 Op1, Demanded1, DemandedElts, TLO.DAG,Depth + 1);

2220if (DemandedOp0 || DemandedOp1) {

2221 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;

2222 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;

2223SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,

2224 DemandedOp1, Op2);

2225return TLO.CombineTo(Op, NewOp);

2226 }

2227 }

2228 }

2229

2230// For pow-2 bitwidths we only demand the bottom modulo amt bits.

2231if (isPowerOf2_32(BitWidth)) {

2232APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(),BitWidth - 1);

2233if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,

2234 Known2, TLO,Depth + 1))

2235returntrue;

2236 }

2237break;

2238 }

2239caseISD::ROTL:

2240caseISD::ROTR: {

2241SDValue Op0 =Op.getOperand(0);

2242SDValue Op1 =Op.getOperand(1);

2243bool IsROTL = (Op.getOpcode() ==ISD::ROTL);

2244

2245// If we're rotating an 0/-1 value, then it stays an 0/-1 value.

2246if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1))

2247return TLO.CombineTo(Op, Op0);

2248

2249if (ConstantSDNode *SA =isConstOrConstSplat(Op1, DemandedElts)) {

2250unsigned Amt = SA->getAPIntValue().urem(BitWidth);

2251unsigned RevAmt =BitWidth - Amt;

2252

2253// rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))

2254// rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)

2255APInt Demanded0 =DemandedBits.rotr(IsROTL ? Amt : RevAmt);

2256if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,

2257Depth + 1))

2258returntrue;

2259

2260// rot*(x, 0) --> x

2261if (Amt == 0)

2262return TLO.CombineTo(Op, Op0);

2263

2264// See if we don't demand either half of the rotated bits.

2265if ((!TLO.LegalOperations() ||isOperationLegal(ISD::SHL, VT)) &&

2266DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {

2267 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());

2268return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));

2269 }

2270if ((!TLO.LegalOperations() ||isOperationLegal(ISD::SRL, VT)) &&

2271DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {

2272 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());

2273return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));

2274 }

2275 }

2276

2277// For pow-2 bitwidths we only demand the bottom modulo amt bits.

2278if (isPowerOf2_32(BitWidth)) {

2279APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(),BitWidth - 1);

2280if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,

2281Depth + 1))

2282returntrue;

2283 }

2284break;

2285 }

2286caseISD::SMIN:

2287caseISD::SMAX:

2288caseISD::UMIN:

2289caseISD::UMAX: {

2290unsigned Opc =Op.getOpcode();

2291SDValue Op0 =Op.getOperand(0);

2292SDValue Op1 =Op.getOperand(1);

2293

2294// If we're only demanding signbits, then we can simplify to OR/AND node.

2295unsigned BitOp =

2296 (Opc ==ISD::SMIN || Opc ==ISD::UMAX) ?ISD::OR :ISD::AND;

2297unsigned NumSignBits =

2298 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts,Depth + 1),

2299 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts,Depth + 1));

2300unsigned NumDemandedUpperBits =BitWidth -DemandedBits.countr_zero();

2301if (NumSignBits >= NumDemandedUpperBits)

2302return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp,SDLoc(Op), VT, Op0, Op1));

2303

2304// Check if one arg is always less/greater than (or equal) to the other arg.

2305KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts,Depth + 1);

2306KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts,Depth + 1);

2307switch (Opc) {

2308caseISD::SMIN:

2309if (std::optional<bool> IsSLE =KnownBits::sle(Known0, Known1))

2310return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);

2311if (std::optional<bool> IsSLT =KnownBits::slt(Known0, Known1))

2312return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);

2313 Known =KnownBits::smin(Known0, Known1);

2314break;

2315caseISD::SMAX:

2316if (std::optional<bool> IsSGE =KnownBits::sge(Known0, Known1))

2317return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);

2318if (std::optional<bool> IsSGT =KnownBits::sgt(Known0, Known1))

2319return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);

2320 Known =KnownBits::smax(Known0, Known1);

2321break;

2322caseISD::UMIN:

2323if (std::optional<bool> IsULE =KnownBits::ule(Known0, Known1))

2324return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);

2325if (std::optional<bool> IsULT =KnownBits::ult(Known0, Known1))

2326return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);

2327 Known =KnownBits::umin(Known0, Known1);

2328break;

2329caseISD::UMAX:

2330if (std::optional<bool> IsUGE =KnownBits::uge(Known0, Known1))

2331return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);

2332if (std::optional<bool> IsUGT =KnownBits::ugt(Known0, Known1))

2333return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);

2334 Known =KnownBits::umax(Known0, Known1);

2335break;

2336 }

2337break;

2338 }

2339caseISD::BITREVERSE: {

2340SDValue Src =Op.getOperand(0);

2341APInt DemandedSrcBits =DemandedBits.reverseBits();

2342if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,

2343Depth + 1))

2344returntrue;

2345 Known.One = Known2.One.reverseBits();

2346 Known.Zero = Known2.Zero.reverseBits();

2347break;

2348 }

2349caseISD::BSWAP: {

2350SDValue Src =Op.getOperand(0);

2351

2352// If the only bits demanded come from one byte of the bswap result,

2353// just shift the input byte into position to eliminate the bswap.

2354unsigned NLZ =DemandedBits.countl_zero();

2355unsigned NTZ =DemandedBits.countr_zero();

2356

2357// Round NTZ down to the next byte. If we have 11 trailing zeros, then

2358// we need all the bits down to bit 8. Likewise, round NLZ. If we

2359// have 14 leading zeros, round to 8.

2360 NLZ =alignDown(NLZ, 8);

2361 NTZ =alignDown(NTZ, 8);

2362// If we need exactly one byte, we can do this transformation.

2363if (BitWidth - NLZ - NTZ == 8) {

2364// Replace this with either a left or right shift to get the byte into

2365// the right place.

2366unsigned ShiftOpcode = NLZ > NTZ ?ISD::SRL :ISD::SHL;

2367if (!TLO.LegalOperations() ||isOperationLegal(ShiftOpcode, VT)) {

2368unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;

2369SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);

2370SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);

2371return TLO.CombineTo(Op, NewOp);

2372 }

2373 }

2374

2375APInt DemandedSrcBits =DemandedBits.byteSwap();

2376if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,

2377Depth + 1))

2378returntrue;

2379 Known.One = Known2.One.byteSwap();

2380 Known.Zero = Known2.Zero.byteSwap();

2381break;

2382 }

2383caseISD::CTPOP: {

2384// If only 1 bit is demanded, replace with PARITY as long as we're before

2385// op legalization.

2386// FIXME: Limit to scalars for now.

2387if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())

2388return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,

2389Op.getOperand(0)));

2390

2391 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);

2392break;

2393 }

2394caseISD::SIGN_EXTEND_INREG: {

2395SDValue Op0 =Op.getOperand(0);

2396EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

2397unsigned ExVTBits = ExVT.getScalarSizeInBits();

2398

2399// If we only care about the highest bit, don't bother shifting right.

2400if (DemandedBits.isSignMask()) {

2401unsigned MinSignedBits =

2402 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts,Depth + 1);

2403bool AlreadySignExtended = ExVTBits >= MinSignedBits;

2404// However if the input is already sign extended we expect the sign

2405// extension to be dropped altogether later and do not simplify.

2406if (!AlreadySignExtended) {

2407// Compute the correct shift amount type, which must be getShiftAmountTy

2408// for scalar types after legalization.

2409SDValue ShiftAmt =

2410 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);

2411return TLO.CombineTo(Op,

2412 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));

2413 }

2414 }

2415

2416// If none of the extended bits are demanded, eliminate the sextinreg.

2417if (DemandedBits.getActiveBits() <= ExVTBits)

2418return TLO.CombineTo(Op, Op0);

2419

2420APInt InputDemandedBits =DemandedBits.getLoBits(ExVTBits);

2421

2422// Since the sign extended bits are demanded, we know that the sign

2423// bit is demanded.

2424 InputDemandedBits.setBit(ExVTBits - 1);

2425

2426if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,

2427Depth + 1))

2428returntrue;

2429

2430// If the sign bit of the input is known set or clear, then we know the

2431// top bits of the result.

2432

2433// If the input sign bit is known zero, convert this into a zero extension.

2434if (Known.Zero[ExVTBits - 1])

2435return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));

2436

2437APInt Mask =APInt::getLowBitsSet(BitWidth, ExVTBits);

2438if (Known.One[ExVTBits - 1]) {// Input sign bit known set

2439 Known.One.setBitsFrom(ExVTBits);

2440 Known.Zero &= Mask;

2441 }else {// Input sign bit unknown

2442 Known.Zero &= Mask;

2443 Known.One &= Mask;

2444 }

2445break;

2446 }

2447caseISD::BUILD_PAIR: {

2448EVT HalfVT =Op.getOperand(0).getValueType();

2449unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();

2450

2451APInt MaskLo =DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);

2452APInt MaskHi =DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);

2453

2454KnownBits KnownLo, KnownHi;

2455

2456if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO,Depth + 1))

2457returntrue;

2458

2459if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO,Depth + 1))

2460returntrue;

2461

2462 Known = KnownHi.concat(KnownLo);

2463break;

2464 }

2465caseISD::ZERO_EXTEND_VECTOR_INREG:

2466if (VT.isScalableVector())

2467returnfalse;

2468 [[fallthrough]];

2469caseISD::ZERO_EXTEND: {

2470SDValue Src =Op.getOperand(0);

2471EVT SrcVT = Src.getValueType();

2472unsigned InBits = SrcVT.getScalarSizeInBits();

2473unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;

2474bool IsVecInReg =Op.getOpcode() ==ISD::ZERO_EXTEND_VECTOR_INREG;

2475

2476// If none of the top bits are demanded, convert this into an any_extend.

2477if (DemandedBits.getActiveBits() <= InBits) {

2478// If we only need the non-extended bits of the bottom element

2479// then we can just bitcast to the result.

2480if (IsLE && IsVecInReg && DemandedElts == 1 &&

2481 VT.getSizeInBits() == SrcVT.getSizeInBits())

2482return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

2483

2484unsigned Opc =

2485 IsVecInReg ?ISD::ANY_EXTEND_VECTOR_INREG :ISD::ANY_EXTEND;

2486if (!TLO.LegalOperations() ||isOperationLegal(Opc, VT))

2487return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));

2488 }

2489

2490APInt InDemandedBits =DemandedBits.trunc(InBits);

2491APInt InDemandedElts = DemandedElts.zext(InElts);

2492if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,

2493Depth + 1)) {

2494Op->dropFlags(SDNodeFlags::NonNeg);

2495returntrue;

2496 }

2497assert(Known.getBitWidth() == InBits &&"Src width has changed?");

2498 Known = Known.zext(BitWidth);

2499

2500// Attempt to avoid multi-use ops if we don't need anything from them.

2501if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(

2502 Src, InDemandedBits, InDemandedElts, TLO.DAG,Depth + 1))

2503return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));

2504break;

2505 }

2506caseISD::SIGN_EXTEND_VECTOR_INREG:

2507if (VT.isScalableVector())

2508returnfalse;

2509 [[fallthrough]];

2510caseISD::SIGN_EXTEND: {

2511SDValue Src =Op.getOperand(0);

2512EVT SrcVT = Src.getValueType();

2513unsigned InBits = SrcVT.getScalarSizeInBits();

2514unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;

2515bool IsVecInReg =Op.getOpcode() ==ISD::SIGN_EXTEND_VECTOR_INREG;

2516

2517APInt InDemandedElts = DemandedElts.zext(InElts);

2518APInt InDemandedBits =DemandedBits.trunc(InBits);

2519

2520// Since some of the sign extended bits are demanded, we know that the sign

2521// bit is demanded.

2522 InDemandedBits.setBit(InBits - 1);

2523

2524// If none of the top bits are demanded, convert this into an any_extend.

2525if (DemandedBits.getActiveBits() <= InBits) {

2526// If we only need the non-extended bits of the bottom element

2527// then we can just bitcast to the result.

2528if (IsLE && IsVecInReg && DemandedElts == 1 &&

2529 VT.getSizeInBits() == SrcVT.getSizeInBits())

2530return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

2531

2532// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.

2533if (getBooleanContents(VT) !=ZeroOrNegativeOneBooleanContent ||

2534 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts,Depth + 1) !=

2535 InBits) {

2536unsigned Opc =

2537 IsVecInReg ?ISD::ANY_EXTEND_VECTOR_INREG :ISD::ANY_EXTEND;

2538if (!TLO.LegalOperations() ||isOperationLegal(Opc, VT))

2539return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));

2540 }

2541 }

2542

2543if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,

2544Depth + 1))

2545returntrue;

2546assert(Known.getBitWidth() == InBits &&"Src width has changed?");

2547

2548// If the sign bit is known one, the top bits match.

2549 Known = Known.sext(BitWidth);

2550

2551// If the sign bit is known zero, convert this to a zero extend.

2552if (Known.isNonNegative()) {

2553unsigned Opc =

2554 IsVecInReg ?ISD::ZERO_EXTEND_VECTOR_INREG :ISD::ZERO_EXTEND;

2555if (!TLO.LegalOperations() ||isOperationLegal(Opc, VT)) {

2556SDNodeFlags Flags;

2557if (!IsVecInReg)

2558 Flags |=SDNodeFlags::NonNeg;

2559return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));

2560 }

2561 }

2562

2563// Attempt to avoid multi-use ops if we don't need anything from them.

2564if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(

2565 Src, InDemandedBits, InDemandedElts, TLO.DAG,Depth + 1))

2566return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));

2567break;

2568 }

2569caseISD::ANY_EXTEND_VECTOR_INREG:

2570if (VT.isScalableVector())

2571returnfalse;

2572 [[fallthrough]];

2573caseISD::ANY_EXTEND: {

2574SDValue Src =Op.getOperand(0);

2575EVT SrcVT = Src.getValueType();

2576unsigned InBits = SrcVT.getScalarSizeInBits();

2577unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;

2578bool IsVecInReg =Op.getOpcode() ==ISD::ANY_EXTEND_VECTOR_INREG;

2579

2580// If we only need the bottom element then we can just bitcast.

2581// TODO: Handle ANY_EXTEND?

2582if (IsLE && IsVecInReg && DemandedElts == 1 &&

2583 VT.getSizeInBits() == SrcVT.getSizeInBits())

2584return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

2585

2586APInt InDemandedBits =DemandedBits.trunc(InBits);

2587APInt InDemandedElts = DemandedElts.zext(InElts);

2588if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,

2589Depth + 1))

2590returntrue;

2591assert(Known.getBitWidth() == InBits &&"Src width has changed?");

2592 Known = Known.anyext(BitWidth);

2593

2594// Attempt to avoid multi-use ops if we don't need anything from them.

2595if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(

2596 Src, InDemandedBits, InDemandedElts, TLO.DAG,Depth + 1))

2597return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));

2598break;

2599 }

2600caseISD::TRUNCATE: {

2601SDValue Src =Op.getOperand(0);

2602

2603// Simplify the input, using demanded bit information, and compute the known

2604// zero/one bits live out.

2605unsigned OperandBitWidth = Src.getScalarValueSizeInBits();

2606APInt TruncMask =DemandedBits.zext(OperandBitWidth);

2607if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,

2608Depth + 1)) {

2609// Disable the nsw and nuw flags. We can no longer guarantee that we

2610// won't wrap after simplification.

2611Op->dropFlags(SDNodeFlags::NoWrap);

2612returntrue;

2613 }

2614 Known = Known.trunc(BitWidth);

2615

2616// Attempt to avoid multi-use ops if we don't need anything from them.

2617if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(

2618 Src, TruncMask, DemandedElts, TLO.DAG,Depth + 1))

2619return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));

2620

2621// If the input is only used by this truncate, see if we can shrink it based

2622// on the known demanded bits.

2623switch (Src.getOpcode()) {

2624default:

2625break;

2626caseISD::SRL:

2627// Shrink SRL by a constant if none of the high bits shifted in are

2628// demanded.

2629if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))

2630// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is

2631// undesirable.

2632break;

2633

2634if (Src.getNode()->hasOneUse()) {

2635if (isTruncateFree(Src, VT) &&

2636 !isTruncateFree(Src.getValueType(), VT)) {

2637// If truncate is only free at trunc(srl), do not turn it into

2638// srl(trunc). The check is done by first check the truncate is free

2639// at Src's opcode(srl), then check the truncate is not done by

2640// referencing sub-register. In test, if both trunc(srl) and

2641// srl(trunc)'s trunc are free, srl(trunc) performs better. If only

2642// trunc(srl)'s trunc is free, trunc(srl) is better.

2643break;

2644 }

2645

2646 std::optional<uint64_t> ShAmtC =

2647 TLO.DAG.getValidShiftAmount(Src, DemandedElts,Depth + 2);

2648if (!ShAmtC || *ShAmtC >=BitWidth)

2649break;

2650uint64_t ShVal = *ShAmtC;

2651

2652APInt HighBits =

2653APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth -BitWidth);

2654 HighBits.lshrInPlace(ShVal);

2655 HighBits = HighBits.trunc(BitWidth);

2656if (!(HighBits &DemandedBits)) {

2657// None of the shifted in bits are needed. Add a truncate of the

2658// shift input, then shift it.

2659SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);

2660SDValue NewTrunc =

2661 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));

2662return TLO.CombineTo(

2663Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));

2664 }

2665 }

2666break;

2667 }

2668

2669break;

2670 }

2671caseISD::AssertZext: {

2672// AssertZext demands all of the high bits, plus any of the low bits

2673// demanded by its users.

2674EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

2675APInt InMask =APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());

2676if (SimplifyDemandedBits(Op.getOperand(0), ~InMask |DemandedBits, Known,

2677 TLO,Depth + 1))

2678returntrue;

2679

2680 Known.Zero |= ~InMask;

2681 Known.One &= (~Known.Zero);

2682break;

2683 }

2684caseISD::EXTRACT_VECTOR_ELT: {

2685SDValue Src =Op.getOperand(0);

2686SDValue Idx =Op.getOperand(1);

2687ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();

2688unsigned EltBitWidth = Src.getScalarValueSizeInBits();

2689

2690if (SrcEltCnt.isScalable())

2691returnfalse;

2692

2693// Demand the bits from every vector element without a constant index.

2694unsigned NumSrcElts = SrcEltCnt.getFixedValue();

2695APInt DemandedSrcElts =APInt::getAllOnes(NumSrcElts);

2696if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))

2697if (CIdx->getAPIntValue().ult(NumSrcElts))

2698 DemandedSrcElts =APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());

2699

2700// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know

2701// anything about the extended bits.

2702APInt DemandedSrcBits =DemandedBits;

2703if (BitWidth > EltBitWidth)

2704 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);

2705

2706if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,

2707Depth + 1))

2708returntrue;

2709

2710// Attempt to avoid multi-use ops if we don't need anything from them.

2711if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {

2712if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(

2713 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG,Depth + 1)) {

2714SDValue NewOp =

2715 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,Idx);

2716return TLO.CombineTo(Op, NewOp);

2717 }

2718 }

2719

2720 Known = Known2;

2721if (BitWidth > EltBitWidth)

2722 Known = Known.anyext(BitWidth);

2723break;

2724 }

2725caseISD::BITCAST: {

2726if (VT.isScalableVector())

2727returnfalse;

2728SDValue Src =Op.getOperand(0);

2729EVT SrcVT = Src.getValueType();

2730unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();

2731

2732// If this is an FP->Int bitcast and if the sign bit is the only

2733// thing demanded, turn this into a FGETSIGN.

2734if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&

2735DemandedBits ==APInt::getSignMask(Op.getValueSizeInBits()) &&

2736 SrcVT.isFloatingPoint()) {

2737bool OpVTLegal =isOperationLegalOrCustom(ISD::FGETSIGN, VT);

2738bool i32Legal =isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);

2739if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&

2740 SrcVT != MVT::f128) {

2741// Cannot eliminate/lower SHL for f128 yet.

2742EVT Ty = OpVTLegal ? VT : MVT::i32;

2743// Make a FGETSIGN + SHL to move the sign bit into the appropriate

2744// place. We expect the SHL to be eliminated by other optimizations.

2745SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);

2746unsigned OpVTSizeInBits =Op.getValueSizeInBits();

2747if (!OpVTLegal && OpVTSizeInBits > 32)

2748 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);

2749unsigned ShVal =Op.getValueSizeInBits() - 1;

2750SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);

2751return TLO.CombineTo(Op,

2752 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));

2753 }

2754 }

2755

2756// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.

2757// Demand the elt/bit if any of the original elts/bits are demanded.

2758if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {

2759unsigned Scale =BitWidth / NumSrcEltBits;

2760unsigned NumSrcElts = SrcVT.getVectorNumElements();

2761APInt DemandedSrcBits =APInt::getZero(NumSrcEltBits);

2762APInt DemandedSrcElts =APInt::getZero(NumSrcElts);

2763for (unsigned i = 0; i != Scale; ++i) {

2764unsigned EltOffset = IsLE ? i : (Scale - 1 - i);

2765unsigned BitOffset = EltOffset * NumSrcEltBits;

2766APInt Sub =DemandedBits.extractBits(NumSrcEltBits, BitOffset);

2767if (!Sub.isZero()) {

2768 DemandedSrcBits |= Sub;

2769for (unsigned j = 0; j != NumElts; ++j)

2770if (DemandedElts[j])

2771 DemandedSrcElts.setBit((j * Scale) + i);

2772 }

2773 }

2774

2775APInt KnownSrcUndef, KnownSrcZero;

2776if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,

2777 KnownSrcZero, TLO,Depth + 1))

2778returntrue;

2779

2780KnownBits KnownSrcBits;

2781if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,

2782 KnownSrcBits, TLO,Depth + 1))

2783returntrue;

2784 }elseif (IsLE && (NumSrcEltBits %BitWidth) == 0) {

2785// TODO - bigendian once we have test coverage.

2786unsigned Scale = NumSrcEltBits /BitWidth;

2787unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;

2788APInt DemandedSrcBits =APInt::getZero(NumSrcEltBits);

2789APInt DemandedSrcElts =APInt::getZero(NumSrcElts);

2790for (unsigned i = 0; i != NumElts; ++i)

2791if (DemandedElts[i]) {

2792unsignedOffset = (i % Scale) *BitWidth;

2793 DemandedSrcBits.insertBits(DemandedBits,Offset);

2794 DemandedSrcElts.setBit(i / Scale);

2795 }

2796

2797if (SrcVT.isVector()) {

2798APInt KnownSrcUndef, KnownSrcZero;

2799if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,

2800 KnownSrcZero, TLO,Depth + 1))

2801returntrue;

2802 }

2803

2804KnownBits KnownSrcBits;

2805if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,

2806 KnownSrcBits, TLO,Depth + 1))

2807returntrue;

2808

2809// Attempt to avoid multi-use ops if we don't need anything from them.

2810if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {

2811if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(

2812 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG,Depth + 1)) {

2813SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);

2814return TLO.CombineTo(Op, NewOp);

2815 }

2816 }

2817 }

2818

2819// If this is a bitcast, let computeKnownBits handle it. Only do this on a

2820// recursive call where Known may be useful to the caller.

2821if (Depth > 0) {

2822 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);

2823returnfalse;

2824 }

2825break;

2826 }

2827caseISD::MUL:

2828if (DemandedBits.isPowerOf2()) {

2829// The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.

2830// If we demand exactly one bit N and we have "X * (C' << N)" where C' is

2831// odd (has LSB set), then the left-shifted low bit of X is the answer.

2832unsigned CTZ =DemandedBits.countr_zero();

2833ConstantSDNode *C =isConstOrConstSplat(Op.getOperand(1), DemandedElts);

2834if (C &&C->getAPIntValue().countr_zero() == CTZ) {

2835SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);

2836SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT,Op.getOperand(0), AmtC);

2837return TLO.CombineTo(Op, Shl);

2838 }

2839 }

2840// For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:

2841// X * X is odd iff X is odd.

2842// 'Quadratic Reciprocity': X * X -> 0 for bit[1]

2843if (Op.getOperand(0) ==Op.getOperand(1) &&DemandedBits.ult(4)) {

2844SDValue One = TLO.DAG.getConstant(1, dl, VT);

2845SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT,Op.getOperand(0), One);

2846return TLO.CombineTo(Op, And1);

2847 }

2848 [[fallthrough]];

2849caseISD::ADD:

2850caseISD::SUB: {

2851// Add, Sub, and Mul don't demand any bits in positions beyond that

2852// of the highest bit demanded of them.

2853SDValue Op0 =Op.getOperand(0), Op1 =Op.getOperand(1);

2854SDNodeFlags Flags =Op.getNode()->getFlags();

2855unsigned DemandedBitsLZ =DemandedBits.countl_zero();

2856APInt LoMask =APInt::getLowBitsSet(BitWidth,BitWidth - DemandedBitsLZ);

2857KnownBits KnownOp0, KnownOp1;

2858auto GetDemandedBitsLHSMask = [&](APInt Demanded,

2859constKnownBits &KnownRHS) {

2860if (Op.getOpcode() ==ISD::MUL)

2861 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());

2862return Demanded;

2863 };

2864if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,

2865Depth + 1) ||

2866 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),

2867 DemandedElts, KnownOp0, TLO,Depth + 1) ||

2868// See if the operation should be performed at a smaller bit width.

2869 ShrinkDemandedOp(Op,BitWidth,DemandedBits, TLO)) {

2870// Disable the nsw and nuw flags. We can no longer guarantee that we

2871// won't wrap after simplification.

2872Op->dropFlags(SDNodeFlags::NoWrap);

2873returntrue;

2874 }

2875

2876// neg x with only low bit demanded is simply x.

2877if (Op.getOpcode() ==ISD::SUB &&DemandedBits.isOne() &&

2878isNullConstant(Op0))

2879return TLO.CombineTo(Op, Op1);

2880

2881// Attempt to avoid multi-use ops if we don't need anything from them.

2882if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {

2883SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

2884 Op0, LoMask, DemandedElts, TLO.DAG,Depth + 1);

2885SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

2886 Op1, LoMask, DemandedElts, TLO.DAG,Depth + 1);

2887if (DemandedOp0 || DemandedOp1) {

2888 Op0 = DemandedOp0 ? DemandedOp0 : Op0;

2889 Op1 = DemandedOp1 ? DemandedOp1 : Op1;

2890SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,

2891 Flags & ~SDNodeFlags::NoWrap);

2892return TLO.CombineTo(Op, NewOp);

2893 }

2894 }

2895

2896// If we have a constant operand, we may be able to turn it into -1 if we

2897// do not demand the high bits. This can make the constant smaller to

2898// encode, allow more general folding, or match specialized instruction

2899// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that

2900// is probably not useful (and could be detrimental).

2901ConstantSDNode *C =isConstOrConstSplat(Op1);

2902APInt HighMask =APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);

2903if (C && !C->isAllOnes() && !C->isOne() &&

2904 (C->getAPIntValue() | HighMask).isAllOnes()) {

2905SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);

2906// Disable the nsw and nuw flags. We can no longer guarantee that we

2907// won't wrap after simplification.

2908SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,

2909 Flags & ~SDNodeFlags::NoWrap);

2910return TLO.CombineTo(Op, NewOp);

2911 }

2912

2913// Match a multiply with a disguised negated-power-of-2 and convert to a

2914// an equivalent shift-left amount.

2915// Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))

2916auto getShiftLeftAmt = [&HighMask](SDValue Mul) ->unsigned {

2917if (Mul.getOpcode() !=ISD::MUL || !Mul.hasOneUse())

2918return 0;

2919

2920// Don't touch opaque constants. Also, ignore zero and power-of-2

2921// multiplies. Those will get folded later.

2922ConstantSDNode *MulC =isConstOrConstSplat(Mul.getOperand(1));

2923if (MulC && !MulC->isOpaque() && !MulC->isZero() &&

2924 !MulC->getAPIntValue().isPowerOf2()) {

2925APInt UnmaskedC = MulC->getAPIntValue() | HighMask;

2926if (UnmaskedC.isNegatedPowerOf2())

2927return (-UnmaskedC).logBase2();

2928 }

2929return 0;

2930 };

2931

2932auto foldMul = [&](ISD::NodeType NT,SDValue X,SDValue Y,

2933unsigned ShlAmt) {

2934SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);

2935SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT,X, ShlAmtC);

2936SDValue Res = TLO.DAG.getNode(NT, dl, VT,Y, Shl);

2937return TLO.CombineTo(Op, Res);

2938 };

2939

2940if (isOperationLegalOrCustom(ISD::SHL, VT)) {

2941if (Op.getOpcode() ==ISD::ADD) {

2942// (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))

2943if (unsigned ShAmt = getShiftLeftAmt(Op0))

2944return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);

2945// Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))

2946if (unsigned ShAmt = getShiftLeftAmt(Op1))

2947return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);

2948 }

2949if (Op.getOpcode() ==ISD::SUB) {

2950// Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))

2951if (unsigned ShAmt = getShiftLeftAmt(Op1))

2952return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);

2953 }

2954 }

2955

2956if (Op.getOpcode() ==ISD::MUL) {

2957 Known =KnownBits::mul(KnownOp0, KnownOp1);

2958 }else {// Op.getOpcode() is either ISD::ADD or ISD::SUB.

2959 Known =KnownBits::computeForAddSub(

2960Op.getOpcode() ==ISD::ADD, Flags.hasNoSignedWrap(),

2961 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);

2962 }

2963break;

2964 }

2965default:

2966// We also ask the target about intrinsics (which could be specific to it).

2967if (Op.getOpcode() >=ISD::BUILTIN_OP_END ||

2968Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN) {

2969// TODO: Probably okay to remove after audit; here to reduce change size

2970// in initial enablement patch for scalable vectors

2971if (Op.getValueType().isScalableVector())

2972break;

2973if (SimplifyDemandedBitsForTargetNode(Op,DemandedBits, DemandedElts,

2974 Known, TLO,Depth))

2975returntrue;

2976break;

2977 }

2978

2979// Just use computeKnownBits to compute output bits.

2980 Known = TLO.DAG.computeKnownBits(Op, DemandedElts,Depth);

2981break;

2982 }

2983

2984// If we know the value of all of the demanded bits, return this as a

2985// constant.

2986if (!isTargetCanonicalConstantNode(Op) &&

2987DemandedBits.isSubsetOf(Known.Zero | Known.One)) {

2988// Avoid folding to a constant if any OpaqueConstant is involved.

2989if (llvm::any_of(Op->ops(), [](SDValue V) {

2990 auto *C = dyn_cast<ConstantSDNode>(V);

2991 return C && C->isOpaque();

2992 }))

2993returnfalse;

2994if (VT.isInteger())

2995return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));

2996if (VT.isFloatingPoint())

2997return TLO.CombineTo(

2998Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),

2999 dl, VT));

3000 }

3001

3002// A multi use 'all demanded elts' simplify failed to find any knownbits.

3003// Try again just for the original demanded elts.

3004// Ensure we do this AFTER constant folding above.

3005if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())

3006 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts,Depth);

3007

3008returnfalse;

3009}

3010

3011boolTargetLowering::SimplifyDemandedVectorElts(SDValue Op,

3012constAPInt &DemandedElts,

3013DAGCombinerInfo &DCI) const{

3014SelectionDAG &DAG = DCI.DAG;

3015TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),

3016 !DCI.isBeforeLegalizeOps());

3017

3018APInt KnownUndef, KnownZero;

3019bool Simplified =

3020 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);

3021if (Simplified) {

3022 DCI.AddToWorklist(Op.getNode());

3023 DCI.CommitTargetLoweringOpt(TLO);

3024 }

3025

3026return Simplified;

3027}

3028

3029/// Given a vector binary operation and known undefined elements for each input

3030/// operand, compute whether each element of the output is undefined.

3031staticAPInt getKnownUndefForVectorBinop(SDValue BO,SelectionDAG &DAG,

3032constAPInt &UndefOp0,

3033constAPInt &UndefOp1) {

3034EVT VT = BO.getValueType();

3035assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&

3036"Vector binop only");

3037

3038EVT EltVT = VT.getVectorElementType();

3039unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;

3040assert(UndefOp0.getBitWidth() == NumElts &&

3041 UndefOp1.getBitWidth() == NumElts &&"Bad type for undef analysis");

3042

3043auto getUndefOrConstantElt = [&](SDValue V,unsigned Index,

3044constAPInt &UndefVals) {

3045if (UndefVals[Index])

3046return DAG.getUNDEF(EltVT);

3047

3048if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {

3049// Try hard to make sure that the getNode() call is not creating temporary

3050// nodes. Ignore opaque integers because they do not constant fold.

3051SDValue Elt = BV->getOperand(Index);

3052auto *C = dyn_cast<ConstantSDNode>(Elt);

3053if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))

3054return Elt;

3055 }

3056

3057returnSDValue();

3058 };

3059

3060APInt KnownUndef =APInt::getZero(NumElts);

3061for (unsigned i = 0; i != NumElts; ++i) {

3062// If both inputs for this element are either constant or undef and match

3063// the element type, compute the constant/undef result for this element of

3064// the vector.

3065// TODO: Ideally we would use FoldConstantArithmetic() here, but that does

3066// not handle FP constants. The code within getNode() should be refactored

3067// to avoid the danger of creating a bogus temporary node here.

3068SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);

3069SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);

3070if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)

3071if (DAG.getNode(BO.getOpcode(),SDLoc(BO), EltVT, C0, C1).isUndef())

3072 KnownUndef.setBit(i);

3073 }

3074return KnownUndef;

3075}

3076

3077boolTargetLowering::SimplifyDemandedVectorElts(

3078SDValue Op,constAPInt &OriginalDemandedElts,APInt &KnownUndef,

3079APInt &KnownZero,TargetLoweringOpt &TLO,unsignedDepth,

3080bool AssumeSingleUse) const{

3081EVT VT =Op.getValueType();

3082unsigned Opcode =Op.getOpcode();

3083APInt DemandedElts = OriginalDemandedElts;

3084unsigned NumElts = DemandedElts.getBitWidth();

3085assert(VT.isVector() &&"Expected vector op");

3086

3087 KnownUndef = KnownZero =APInt::getZero(NumElts);

3088

3089if (!shouldSimplifyDemandedVectorElts(Op, TLO))

3090returnfalse;

3091

3092// TODO: For now we assume we know nothing about scalable vectors.

3093if (VT.isScalableVector())

3094returnfalse;

3095

3096assert(VT.getVectorNumElements() == NumElts &&

3097"Mask size mismatches value type element count!");

3098

3099// Undef operand.

3100if (Op.isUndef()) {

3101 KnownUndef.setAllBits();

3102returnfalse;

3103 }

3104

3105// If Op has other users, assume that all elements are needed.

3106if (!AssumeSingleUse && !Op.getNode()->hasOneUse())

3107 DemandedElts.setAllBits();

3108

3109// Not demanding any elements from Op.

3110if (DemandedElts == 0) {

3111 KnownUndef.setAllBits();

3112return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

3113 }

3114

3115// Limit search depth.

3116if (Depth >=SelectionDAG::MaxRecursionDepth)

3117returnfalse;

3118

3119SDLoc DL(Op);

3120unsigned EltSizeInBits = VT.getScalarSizeInBits();

3121bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();

3122

3123// Helper for demanding the specified elements and all the bits of both binary

3124// operands.

3125auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0,SDValue Op1) {

3126SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,

3127 TLO.DAG,Depth + 1);

3128SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,

3129 TLO.DAG,Depth + 1);

3130if (NewOp0 || NewOp1) {

3131SDValue NewOp =

3132 TLO.DAG.getNode(Opcode,SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,

3133 NewOp1 ? NewOp1 : Op1,Op->getFlags());

3134return TLO.CombineTo(Op, NewOp);

3135 }

3136returnfalse;

3137 };

3138

3139switch (Opcode) {

3140caseISD::SCALAR_TO_VECTOR: {

3141if (!DemandedElts[0]) {

3142 KnownUndef.setAllBits();

3143return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

3144 }

3145SDValue ScalarSrc =Op.getOperand(0);

3146if (ScalarSrc.getOpcode() ==ISD::EXTRACT_VECTOR_ELT) {

3147SDValue Src = ScalarSrc.getOperand(0);

3148SDValue Idx = ScalarSrc.getOperand(1);

3149EVT SrcVT = Src.getValueType();

3150

3151ElementCount SrcEltCnt = SrcVT.getVectorElementCount();

3152

3153if (SrcEltCnt.isScalable())

3154returnfalse;

3155

3156unsigned NumSrcElts = SrcEltCnt.getFixedValue();

3157if (isNullConstant(Idx)) {

3158APInt SrcDemandedElts =APInt::getOneBitSet(NumSrcElts, 0);

3159APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);

3160APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);

3161if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,

3162 TLO,Depth + 1))

3163returntrue;

3164 }

3165 }

3166 KnownUndef.setHighBits(NumElts - 1);

3167break;

3168 }

3169caseISD::BITCAST: {

3170SDValue Src =Op.getOperand(0);

3171EVT SrcVT = Src.getValueType();

3172

3173// We only handle vectors here.

3174// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?

3175if (!SrcVT.isVector())

3176break;

3177

3178// Fast handling of 'identity' bitcasts.

3179unsigned NumSrcElts = SrcVT.getVectorNumElements();

3180if (NumSrcElts == NumElts)

3181return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,

3182 KnownZero, TLO,Depth + 1);

3183

3184APInt SrcDemandedElts, SrcZero, SrcUndef;

3185

3186// Bitcast from 'large element' src vector to 'small element' vector, we

3187// must demand a source element if any DemandedElt maps to it.

3188if ((NumElts % NumSrcElts) == 0) {

3189unsigned Scale = NumElts / NumSrcElts;

3190 SrcDemandedElts =APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);

3191if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,

3192 TLO,Depth + 1))

3193returntrue;

3194

3195// Try calling SimplifyDemandedBits, converting demanded elts to the bits

3196// of the large element.

3197// TODO - bigendian once we have test coverage.

3198if (IsLE) {

3199unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();

3200APInt SrcDemandedBits =APInt::getZero(SrcEltSizeInBits);

3201for (unsigned i = 0; i != NumElts; ++i)

3202if (DemandedElts[i]) {

3203unsigned Ofs = (i % Scale) * EltSizeInBits;

3204 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);

3205 }

3206

3207KnownBits Known;

3208if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,

3209 TLO,Depth + 1))

3210returntrue;

3211

3212// The bitcast has split each wide element into a number of

3213// narrow subelements. We have just computed the Known bits

3214// for wide elements. See if element splitting results in

3215// some subelements being zero. Only for demanded elements!

3216for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {

3217if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)

3218 .isAllOnes())

3219continue;

3220for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {

3221unsigned Elt = Scale * SrcElt + SubElt;

3222if (DemandedElts[Elt])

3223 KnownZero.setBit(Elt);

3224 }

3225 }

3226 }

3227

3228// If the src element is zero/undef then all the output elements will be -

3229// only demanded elements are guaranteed to be correct.

3230for (unsigned i = 0; i != NumSrcElts; ++i) {

3231if (SrcDemandedElts[i]) {

3232if (SrcZero[i])

3233 KnownZero.setBits(i * Scale, (i + 1) * Scale);

3234if (SrcUndef[i])

3235 KnownUndef.setBits(i * Scale, (i + 1) * Scale);

3236 }

3237 }

3238 }

3239

3240// Bitcast from 'small element' src vector to 'large element' vector, we

3241// demand all smaller source elements covered by the larger demanded element

3242// of this vector.

3243if ((NumSrcElts % NumElts) == 0) {

3244unsigned Scale = NumSrcElts / NumElts;

3245 SrcDemandedElts =APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);

3246if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,

3247 TLO,Depth + 1))

3248returntrue;

3249

3250// If all the src elements covering an output element are zero/undef, then

3251// the output element will be as well, assuming it was demanded.

3252for (unsigned i = 0; i != NumElts; ++i) {

3253if (DemandedElts[i]) {

3254if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())

3255 KnownZero.setBit(i);

3256if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())

3257 KnownUndef.setBit(i);

3258 }

3259 }

3260 }

3261break;

3262 }

3263caseISD::FREEZE: {

3264SDValue N0 =Op.getOperand(0);

3265if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,

3266/*PoisonOnly=*/false))

3267return TLO.CombineTo(Op, N0);

3268

3269// TODO: Replace this with the general fold from DAGCombiner::visitFREEZE

3270// freeze(op(x, ...)) -> op(freeze(x), ...).

3271if (N0.getOpcode() ==ISD::SCALAR_TO_VECTOR && DemandedElts == 1)

3272return TLO.CombineTo(

3273Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR,DL, VT,

3274 TLO.DAG.getFreeze(N0.getOperand(0))));

3275break;

3276 }

3277caseISD::BUILD_VECTOR: {

3278// Check all elements and simplify any unused elements with UNDEF.

3279if (!DemandedElts.isAllOnes()) {

3280// Don't simplify BROADCASTS.

3281if (llvm::any_of(Op->op_values(),

3282 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {

3283SmallVector<SDValue, 32> Ops(Op->ops());

3284bool Updated =false;

3285for (unsigned i = 0; i != NumElts; ++i) {

3286if (!DemandedElts[i] && !Ops[i].isUndef()) {

3287 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());

3288 KnownUndef.setBit(i);

3289 Updated =true;

3290 }

3291 }

3292if (Updated)

3293return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT,DL, Ops));

3294 }

3295 }

3296for (unsigned i = 0; i != NumElts; ++i) {

3297SDValue SrcOp =Op.getOperand(i);

3298if (SrcOp.isUndef()) {

3299 KnownUndef.setBit(i);

3300 }elseif (EltSizeInBits ==SrcOp.getScalarValueSizeInBits() &&

3301 (isNullConstant(SrcOp) ||isNullFPConstant(SrcOp))) {

3302 KnownZero.setBit(i);

3303 }

3304 }

3305break;

3306 }

3307caseISD::CONCAT_VECTORS: {

3308EVT SubVT =Op.getOperand(0).getValueType();

3309unsigned NumSubVecs =Op.getNumOperands();

3310unsigned NumSubElts = SubVT.getVectorNumElements();

3311for (unsigned i = 0; i != NumSubVecs; ++i) {

3312SDValue SubOp =Op.getOperand(i);

3313APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);

3314APInt SubUndef, SubZero;

3315if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,

3316Depth + 1))

3317returntrue;

3318 KnownUndef.insertBits(SubUndef, i * NumSubElts);

3319 KnownZero.insertBits(SubZero, i * NumSubElts);

3320 }

3321

3322// Attempt to avoid multi-use ops if we don't need anything from them.

3323if (!DemandedElts.isAllOnes()) {

3324bool FoundNewSub =false;

3325SmallVector<SDValue, 2> DemandedSubOps;

3326for (unsigned i = 0; i != NumSubVecs; ++i) {

3327SDValue SubOp =Op.getOperand(i);

3328APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);

3329SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(

3330 SubOp, SubElts, TLO.DAG,Depth + 1);

3331 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);

3332 FoundNewSub = NewSubOp ?true : FoundNewSub;

3333 }

3334if (FoundNewSub) {

3335SDValue NewOp =

3336 TLO.DAG.getNode(Op.getOpcode(),SDLoc(Op), VT, DemandedSubOps);

3337return TLO.CombineTo(Op, NewOp);

3338 }

3339 }

3340break;

3341 }

3342caseISD::INSERT_SUBVECTOR: {

3343// Demand any elements from the subvector and the remainder from the src its

3344// inserted into.

3345SDValue Src =Op.getOperand(0);

3346SDValue Sub =Op.getOperand(1);

3347uint64_t Idx =Op.getConstantOperandVal(2);

3348unsigned NumSubElts = Sub.getValueType().getVectorNumElements();

3349APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts,Idx);

3350APInt DemandedSrcElts = DemandedElts;

3351 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts),Idx);

3352

3353APInt SubUndef, SubZero;

3354if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,

3355Depth + 1))

3356returntrue;

3357

3358// If none of the src operand elements are demanded, replace it with undef.

3359if (!DemandedSrcElts && !Src.isUndef())

3360return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT,

3361 TLO.DAG.getUNDEF(VT), Sub,

3362Op.getOperand(2)));

3363

3364if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,

3365 TLO,Depth + 1))

3366returntrue;

3367 KnownUndef.insertBits(SubUndef,Idx);

3368 KnownZero.insertBits(SubZero,Idx);

3369

3370// Attempt to avoid multi-use ops if we don't need anything from them.

3371if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {

3372SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(

3373 Src, DemandedSrcElts, TLO.DAG,Depth + 1);

3374SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(

3375 Sub, DemandedSubElts, TLO.DAG,Depth + 1);

3376if (NewSrc || NewSub) {

3377 NewSrc = NewSrc ? NewSrc : Src;

3378 NewSub = NewSub ? NewSub : Sub;

3379SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(),SDLoc(Op), VT, NewSrc,

3380 NewSub,Op.getOperand(2));

3381return TLO.CombineTo(Op, NewOp);

3382 }

3383 }

3384break;

3385 }

3386caseISD::EXTRACT_SUBVECTOR: {

3387// Offset the demanded elts by the subvector index.

3388SDValue Src =Op.getOperand(0);

3389if (Src.getValueType().isScalableVector())

3390break;

3391uint64_t Idx =Op.getConstantOperandVal(1);

3392unsigned NumSrcElts = Src.getValueType().getVectorNumElements();

3393APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);

3394

3395APInt SrcUndef, SrcZero;

3396if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,

3397Depth + 1))

3398returntrue;

3399 KnownUndef = SrcUndef.extractBits(NumElts,Idx);

3400 KnownZero = SrcZero.extractBits(NumElts,Idx);

3401

3402// Attempt to avoid multi-use ops if we don't need anything from them.

3403if (!DemandedElts.isAllOnes()) {

3404SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(

3405 Src, DemandedSrcElts, TLO.DAG,Depth + 1);

3406if (NewSrc) {

3407SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(),SDLoc(Op), VT, NewSrc,

3408Op.getOperand(1));

3409return TLO.CombineTo(Op, NewOp);

3410 }

3411 }

3412break;

3413 }

3414caseISD::INSERT_VECTOR_ELT: {

3415SDValue Vec =Op.getOperand(0);

3416SDValue Scl =Op.getOperand(1);

3417auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));

3418

3419// For a legal, constant insertion index, if we don't need this insertion

3420// then strip it, else remove it from the demanded elts.

3421if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {

3422unsignedIdx = CIdx->getZExtValue();

3423if (!DemandedElts[Idx])

3424return TLO.CombineTo(Op, Vec);

3425

3426APInt DemandedVecElts(DemandedElts);

3427 DemandedVecElts.clearBit(Idx);

3428if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,

3429 KnownZero, TLO,Depth + 1))

3430returntrue;

3431

3432 KnownUndef.setBitVal(Idx, Scl.isUndef());

3433

3434 KnownZero.setBitVal(Idx,isNullConstant(Scl) ||isNullFPConstant(Scl));

3435break;

3436 }

3437

3438APInt VecUndef, VecZero;

3439if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,

3440Depth + 1))

3441returntrue;

3442// Without knowing the insertion index we can't set KnownUndef/KnownZero.

3443break;

3444 }

3445caseISD::VSELECT: {

3446SDValue Sel =Op.getOperand(0);

3447SDValue LHS =Op.getOperand(1);

3448SDValue RHS =Op.getOperand(2);

3449

3450// Try to transform the select condition based on the current demanded

3451// elements.

3452APInt UndefSel, ZeroSel;

3453if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,

3454Depth + 1))

3455returntrue;

3456

3457// See if we can simplify either vselect operand.

3458APInt DemandedLHS(DemandedElts);

3459APInt DemandedRHS(DemandedElts);

3460APInt UndefLHS, ZeroLHS;

3461APInt UndefRHS, ZeroRHS;

3462if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,

3463Depth + 1))

3464returntrue;

3465if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,

3466Depth + 1))

3467returntrue;

3468

3469 KnownUndef = UndefLHS & UndefRHS;

3470 KnownZero = ZeroLHS & ZeroRHS;

3471

3472// If we know that the selected element is always zero, we don't need the

3473// select value element.

3474APInt DemandedSel = DemandedElts & ~KnownZero;

3475if (DemandedSel != DemandedElts)

3476if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,

3477Depth + 1))

3478returntrue;

3479

3480break;

3481 }

3482caseISD::VECTOR_SHUFFLE: {

3483SDValue LHS =Op.getOperand(0);

3484SDValue RHS =Op.getOperand(1);

3485ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();

3486

3487// Collect demanded elements from shuffle operands..

3488APInt DemandedLHS(NumElts, 0);

3489APInt DemandedRHS(NumElts, 0);

3490for (unsigned i = 0; i != NumElts; ++i) {

3491int M = ShuffleMask[i];

3492if (M < 0 || !DemandedElts[i])

3493continue;

3494assert(0 <= M && M < (int)(2 * NumElts) &&"Shuffle index out of range");

3495if (M < (int)NumElts)

3496 DemandedLHS.setBit(M);

3497else

3498 DemandedRHS.setBit(M - NumElts);

3499 }

3500

3501// See if we can simplify either shuffle operand.

3502APInt UndefLHS, ZeroLHS;

3503APInt UndefRHS, ZeroRHS;

3504if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,

3505Depth + 1))

3506returntrue;

3507if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,

3508Depth + 1))

3509returntrue;

3510

3511// Simplify mask using undef elements from LHS/RHS.

3512bool Updated =false;

3513bool IdentityLHS =true, IdentityRHS =true;

3514SmallVector<int, 32> NewMask(ShuffleMask);

3515for (unsigned i = 0; i != NumElts; ++i) {

3516int &M = NewMask[i];

3517if (M < 0)

3518continue;

3519if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||

3520 (M >= (int)NumElts && UndefRHS[M - NumElts])) {

3521 Updated =true;

3522 M = -1;

3523 }

3524 IdentityLHS &= (M < 0) || (M == (int)i);

3525 IdentityRHS &= (M < 0) || ((M - NumElts) == i);

3526 }

3527

3528// Update legal shuffle masks based on demanded elements if it won't reduce

3529// to Identity which can cause premature removal of the shuffle mask.

3530if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {

3531SDValue LegalShuffle =

3532 buildLegalVectorShuffle(VT,DL,LHS,RHS, NewMask, TLO.DAG);

3533if (LegalShuffle)

3534return TLO.CombineTo(Op, LegalShuffle);

3535 }

3536

3537// Propagate undef/zero elements from LHS/RHS.

3538for (unsigned i = 0; i != NumElts; ++i) {

3539int M = ShuffleMask[i];

3540if (M < 0) {

3541 KnownUndef.setBit(i);

3542 }elseif (M < (int)NumElts) {

3543if (UndefLHS[M])

3544 KnownUndef.setBit(i);

3545if (ZeroLHS[M])

3546 KnownZero.setBit(i);

3547 }else {

3548if (UndefRHS[M - NumElts])

3549 KnownUndef.setBit(i);

3550if (ZeroRHS[M - NumElts])

3551 KnownZero.setBit(i);

3552 }

3553 }

3554break;

3555 }

3556caseISD::ANY_EXTEND_VECTOR_INREG:

3557caseISD::SIGN_EXTEND_VECTOR_INREG:

3558caseISD::ZERO_EXTEND_VECTOR_INREG: {

3559APInt SrcUndef, SrcZero;

3560SDValue Src =Op.getOperand(0);

3561unsigned NumSrcElts = Src.getValueType().getVectorNumElements();

3562APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);

3563if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,

3564Depth + 1))

3565returntrue;

3566 KnownZero = SrcZero.zextOrTrunc(NumElts);

3567 KnownUndef = SrcUndef.zextOrTrunc(NumElts);

3568

3569if (IsLE &&Op.getOpcode() ==ISD::ANY_EXTEND_VECTOR_INREG &&

3570Op.getValueSizeInBits() == Src.getValueSizeInBits() &&

3571 DemandedSrcElts == 1) {

3572// aext - if we just need the bottom element then we can bitcast.

3573return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

3574 }

3575

3576if (Op.getOpcode() ==ISD::ZERO_EXTEND_VECTOR_INREG) {

3577// zext(undef) upper bits are guaranteed to be zero.

3578if (DemandedElts.isSubsetOf(KnownUndef))

3579return TLO.CombineTo(Op, TLO.DAG.getConstant(0,SDLoc(Op), VT));

3580 KnownUndef.clearAllBits();

3581

3582// zext - if we just need the bottom element then we can mask:

3583// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.

3584if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() ==ISD::AND &&

3585Op->isOnlyUserOf(Src.getNode()) &&

3586Op.getValueSizeInBits() == Src.getValueSizeInBits()) {

3587SDLoc DL(Op);

3588EVT SrcVT = Src.getValueType();

3589EVT SrcSVT = SrcVT.getScalarType();

3590SmallVector<SDValue> MaskElts;

3591 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));

3592 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0,DL, SrcSVT));

3593SDValue Mask = TLO.DAG.getBuildVector(SrcVT,DL, MaskElts);

3594if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(

3595ISD::AND,DL, SrcVT, {Src.getOperand(1), Mask})) {

3596 Fold = TLO.DAG.getNode(ISD::AND,DL, SrcVT, Src.getOperand(0), Fold);

3597return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));

3598 }

3599 }

3600 }

3601break;

3602 }

3603

3604// TODO: There are more binop opcodes that could be handled here - MIN,

3605// MAX, saturated math, etc.

3606caseISD::ADD: {

3607SDValue Op0 =Op.getOperand(0);

3608SDValue Op1 =Op.getOperand(1);

3609if (Op0 == Op1 &&Op->isOnlyUserOf(Op0.getNode())) {

3610APInt UndefLHS, ZeroLHS;

3611if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,

3612Depth + 1,/*AssumeSingleUse*/true))

3613returntrue;

3614 }

3615 [[fallthrough]];

3616 }

3617caseISD::AVGCEILS:

3618caseISD::AVGCEILU:

3619caseISD::AVGFLOORS:

3620caseISD::AVGFLOORU:

3621caseISD::OR:

3622caseISD::XOR:

3623caseISD::SUB:

3624caseISD::FADD:

3625caseISD::FSUB:

3626caseISD::FMUL:

3627caseISD::FDIV:

3628caseISD::FREM: {

3629SDValue Op0 =Op.getOperand(0);

3630SDValue Op1 =Op.getOperand(1);

3631

3632APInt UndefRHS, ZeroRHS;

3633if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,

3634Depth + 1))

3635returntrue;

3636APInt UndefLHS, ZeroLHS;

3637if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,

3638Depth + 1))

3639returntrue;

3640

3641 KnownZero = ZeroLHS & ZeroRHS;

3642 KnownUndef =getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);

3643

3644// Attempt to avoid multi-use ops if we don't need anything from them.

3645// TODO - use KnownUndef to relax the demandedelts?

3646if (!DemandedElts.isAllOnes())

3647if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))

3648returntrue;

3649break;

3650 }

3651caseISD::SHL:

3652caseISD::SRL:

3653caseISD::SRA:

3654caseISD::ROTL:

3655caseISD::ROTR: {

3656SDValue Op0 =Op.getOperand(0);

3657SDValue Op1 =Op.getOperand(1);

3658

3659APInt UndefRHS, ZeroRHS;

3660if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,

3661Depth + 1))

3662returntrue;

3663APInt UndefLHS, ZeroLHS;

3664if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,

3665Depth + 1))

3666returntrue;

3667

3668 KnownZero = ZeroLHS;

3669 KnownUndef = UndefLHS & UndefRHS;// TODO: use getKnownUndefForVectorBinop?

3670

3671// Attempt to avoid multi-use ops if we don't need anything from them.

3672// TODO - use KnownUndef to relax the demandedelts?

3673if (!DemandedElts.isAllOnes())

3674if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))

3675returntrue;

3676break;

3677 }

3678caseISD::MUL:

3679caseISD::MULHU:

3680caseISD::MULHS:

3681caseISD::AND: {

3682SDValue Op0 =Op.getOperand(0);

3683SDValue Op1 =Op.getOperand(1);

3684

3685APInt SrcUndef, SrcZero;

3686if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,

3687Depth + 1))

3688returntrue;

3689// If we know that a demanded element was zero in Op1 we don't need to

3690// demand it in Op0 - its guaranteed to be zero.

3691APInt DemandedElts0 = DemandedElts & ~SrcZero;

3692if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,

3693 TLO,Depth + 1))

3694returntrue;

3695

3696 KnownUndef &= DemandedElts0;

3697 KnownZero &= DemandedElts0;

3698

3699// If every element pair has a zero/undef then just fold to zero.

3700// fold (and x, undef) -> 0 / (and x, 0) -> 0

3701// fold (mul x, undef) -> 0 / (mul x, 0) -> 0

3702if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))

3703return TLO.CombineTo(Op, TLO.DAG.getConstant(0,SDLoc(Op), VT));

3704

3705// If either side has a zero element, then the result element is zero, even

3706// if the other is an UNDEF.

3707// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros

3708// and then handle 'and' nodes with the rest of the binop opcodes.

3709 KnownZero |= SrcZero;

3710 KnownUndef &= SrcUndef;

3711 KnownUndef &= ~KnownZero;

3712

3713// Attempt to avoid multi-use ops if we don't need anything from them.

3714if (!DemandedElts.isAllOnes())

3715if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))

3716returntrue;

3717break;

3718 }

3719caseISD::TRUNCATE:

3720caseISD::SIGN_EXTEND:

3721caseISD::ZERO_EXTEND:

3722if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,

3723 KnownZero, TLO,Depth + 1))

3724returntrue;

3725

3726if (!DemandedElts.isAllOnes())

3727if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(

3728Op.getOperand(0), DemandedElts, TLO.DAG,Depth + 1))

3729return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode,SDLoc(Op), VT, NewOp));

3730

3731if (Op.getOpcode() ==ISD::ZERO_EXTEND) {

3732// zext(undef) upper bits are guaranteed to be zero.

3733if (DemandedElts.isSubsetOf(KnownUndef))

3734return TLO.CombineTo(Op, TLO.DAG.getConstant(0,SDLoc(Op), VT));

3735 KnownUndef.clearAllBits();

3736 }

3737break;

3738caseISD::SINT_TO_FP:

3739caseISD::UINT_TO_FP:

3740caseISD::FP_TO_SINT:

3741caseISD::FP_TO_UINT:

3742if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,

3743 KnownZero, TLO,Depth + 1))

3744returntrue;

3745// Don't fall through to generic undef -> undef handling.

3746returnfalse;

3747default: {

3748if (Op.getOpcode() >=ISD::BUILTIN_OP_END) {

3749if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,

3750 KnownZero, TLO,Depth))

3751returntrue;

3752 }else {

3753KnownBits Known;

3754APInt DemandedBits =APInt::getAllOnes(EltSizeInBits);

3755if (SimplifyDemandedBits(Op,DemandedBits, OriginalDemandedElts, Known,

3756 TLO,Depth, AssumeSingleUse))

3757returntrue;

3758 }

3759break;

3760 }

3761 }

3762assert((KnownUndef & KnownZero) == 0 &&"Elements flagged as undef AND zero");

3763

3764// Constant fold all undef cases.

3765// TODO: Handle zero cases as well.

3766if (DemandedElts.isSubsetOf(KnownUndef))

3767return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

3768

3769returnfalse;

3770}

3771

3772/// Determine which of the bits specified in Mask are known to be either zero or

3773/// one and return them in the Known.

3774voidTargetLowering::computeKnownBitsForTargetNode(constSDValue Op,

3775KnownBits &Known,

3776constAPInt &DemandedElts,

3777constSelectionDAG &DAG,

3778unsignedDepth) const{

3779assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||

3780Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

3781Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||

3782Op.getOpcode() ==ISD::INTRINSIC_VOID) &&

3783"Should use MaskedValueIsZero if you don't know whether Op"

3784" is a target node!");

3785 Known.resetAll();

3786}

3787

3788voidTargetLowering::computeKnownBitsForTargetInstr(

3789GISelKnownBits &Analysis,Register R,KnownBits &Known,

3790constAPInt &DemandedElts,constMachineRegisterInfo &MRI,

3791unsignedDepth) const{

3792 Known.resetAll();

3793}

3794

3795voidTargetLowering::computeKnownBitsForFrameIndex(

3796constint FrameIdx,KnownBits &Known,constMachineFunction &MF) const{

3797// The low bits are known zero if the pointer is aligned.

3798 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));

3799}

3800

3801Align TargetLowering::computeKnownAlignForTargetInstr(

3802GISelKnownBits &Analysis,Register R,constMachineRegisterInfo &MRI,

3803unsignedDepth) const{

3804returnAlign(1);

3805}

3806

3807/// This method can be implemented by targets that want to expose additional

3808/// information about sign bits to the DAG Combiner.

3809unsignedTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,

3810constAPInt &,

3811constSelectionDAG &,

3812unsignedDepth) const{

3813assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||

3814Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

3815Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||

3816Op.getOpcode() ==ISD::INTRINSIC_VOID) &&

3817"Should use ComputeNumSignBits if you don't know whether Op"

3818" is a target node!");

3819return 1;

3820}

3821

3822unsignedTargetLowering::computeNumSignBitsForTargetInstr(

3823GISelKnownBits &Analysis,Register R,constAPInt &DemandedElts,

3824constMachineRegisterInfo &MRI,unsignedDepth) const{

3825return 1;

3826}

3827

3828boolTargetLowering::SimplifyDemandedVectorEltsForTargetNode(

3829SDValue Op,constAPInt &DemandedElts,APInt &KnownUndef,APInt &KnownZero,

3830TargetLoweringOpt &TLO,unsignedDepth) const{

3831assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||

3832Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

3833Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||

3834Op.getOpcode() ==ISD::INTRINSIC_VOID) &&

3835"Should use SimplifyDemandedVectorElts if you don't know whether Op"

3836" is a target node!");

3837returnfalse;

3838}

3839

3840boolTargetLowering::SimplifyDemandedBitsForTargetNode(

3841SDValue Op,constAPInt &DemandedBits,constAPInt &DemandedElts,

3842KnownBits &Known,TargetLoweringOpt &TLO,unsignedDepth) const{

3843assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||

3844Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

3845Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||

3846Op.getOpcode() ==ISD::INTRINSIC_VOID) &&

3847"Should use SimplifyDemandedBits if you don't know whether Op"

3848" is a target node!");

3849 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG,Depth);

3850returnfalse;

3851}

3852

3853SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(

3854SDValue Op,constAPInt &DemandedBits,constAPInt &DemandedElts,

3855SelectionDAG &DAG,unsignedDepth) const{

3856assert(

3857 (Op.getOpcode() >=ISD::BUILTIN_OP_END ||

3858Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

3859Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||

3860Op.getOpcode() ==ISD::INTRINSIC_VOID) &&

3861"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"

3862" is a target node!");

3863returnSDValue();

3864}

3865

3866SDValue

3867TargetLowering::buildLegalVectorShuffle(EVT VT,constSDLoc &DL,SDValue N0,

3868SDValue N1,MutableArrayRef<int> Mask,

3869SelectionDAG &DAG) const{

3870bool LegalMask =isShuffleMaskLegal(Mask, VT);

3871if (!LegalMask) {

3872std::swap(N0, N1);

3873ShuffleVectorSDNode::commuteMask(Mask);

3874 LegalMask =isShuffleMaskLegal(Mask, VT);

3875 }

3876

3877if (!LegalMask)

3878returnSDValue();

3879

3880return DAG.getVectorShuffle(VT,DL, N0, N1, Mask);

3881}

3882

3883constConstant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const{

3884returnnullptr;

3885}

3886

3887boolTargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(

3888SDValue Op,constAPInt &DemandedElts,constSelectionDAG &DAG,

3889boolPoisonOnly,unsignedDepth) const{

3890assert(

3891 (Op.getOpcode() >=ISD::BUILTIN_OP_END ||

3892Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

3893Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||

3894Op.getOpcode() ==ISD::INTRINSIC_VOID) &&

3895"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"

3896" is a target node!");

3897

3898// If Op can't create undef/poison and none of its operands are undef/poison

3899// then Op is never undef/poison.

3900return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG,PoisonOnly,

3901/*ConsiderFlags*/true,Depth) &&

3902all_of(Op->ops(), [&](SDValue V) {

3903 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,

3904 Depth + 1);

3905 });

3906}

3907

3908boolTargetLowering::canCreateUndefOrPoisonForTargetNode(

3909SDValue Op,constAPInt &DemandedElts,constSelectionDAG &DAG,

3910boolPoisonOnly,bool ConsiderFlags,unsignedDepth) const{

3911assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||

3912Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

3913Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||

3914Op.getOpcode() ==ISD::INTRINSIC_VOID) &&

3915"Should use canCreateUndefOrPoison if you don't know whether Op"

3916" is a target node!");

3917// Be conservative and return true.

3918returntrue;

3919}

3920

3921boolTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,

3922constSelectionDAG &DAG,

3923bool SNaN,

3924unsignedDepth) const{

3925assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||

3926Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

3927Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||

3928Op.getOpcode() ==ISD::INTRINSIC_VOID) &&

3929"Should use isKnownNeverNaN if you don't know whether Op"

3930" is a target node!");

3931returnfalse;

3932}

3933

3934boolTargetLowering::isSplatValueForTargetNode(SDValue Op,

3935constAPInt &DemandedElts,

3936APInt &UndefElts,

3937constSelectionDAG &DAG,

3938unsignedDepth) const{

3939assert((Op.getOpcode() >=ISD::BUILTIN_OP_END ||

3940Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

3941Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN ||

3942Op.getOpcode() ==ISD::INTRINSIC_VOID) &&

3943"Should use isSplatValue if you don't know whether Op"

3944" is a target node!");

3945returnfalse;

3946}

3947

3948// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must

3949// work with truncating build vectors and vectors with elements of less than

3950// 8 bits.

3951boolTargetLowering::isConstTrueVal(SDValue N) const{

3952if (!N)

3953returnfalse;

3954

3955unsigned EltWidth;

3956APInt CVal;

3957if (ConstantSDNode *CN =isConstOrConstSplat(N,/*AllowUndefs=*/false,

3958/*AllowTruncation=*/true)) {

3959 CVal = CN->getAPIntValue();

3960 EltWidth =N.getValueType().getScalarSizeInBits();

3961 }else

3962returnfalse;

3963

3964// If this is a truncating splat, truncate the splat value.

3965// Otherwise, we may fail to match the expected values below.

3966if (EltWidth < CVal.getBitWidth())

3967 CVal = CVal.trunc(EltWidth);

3968

3969switch (getBooleanContents(N.getValueType())) {

3970caseUndefinedBooleanContent:

3971return CVal[0];

3972caseZeroOrOneBooleanContent:

3973return CVal.isOne();

3974caseZeroOrNegativeOneBooleanContent:

3975return CVal.isAllOnes();

3976 }

3977

3978llvm_unreachable("Invalid boolean contents");

3979}

3980

3981boolTargetLowering::isConstFalseVal(SDValue N) const{

3982if (!N)

3983returnfalse;

3984

3985constConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);

3986if (!CN) {

3987constBuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);

3988if (!BV)

3989returnfalse;

3990

3991// Only interested in constant splats, we don't care about undef

3992// elements in identifying boolean constants and getConstantSplatNode

3993// returns NULL if all ops are undef;

3994 CN = BV->getConstantSplatNode();

3995if (!CN)

3996returnfalse;

3997 }

3998

3999if (getBooleanContents(N->getValueType(0)) ==UndefinedBooleanContent)

4000return !CN->getAPIntValue()[0];

4001

4002return CN->isZero();

4003}

4004

4005boolTargetLowering::isExtendedTrueVal(constConstantSDNode *N,EVT VT,

4006bool SExt) const{

4007if (VT == MVT::i1)

4008returnN->isOne();

4009

4010TargetLowering::BooleanContent Cnt =getBooleanContents(VT);

4011switch (Cnt) {

4012caseTargetLowering::ZeroOrOneBooleanContent:

4013// An extended value of 1 is always true, unless its original type is i1,

4014// in which case it will be sign extended to -1.

4015return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));

4016caseTargetLowering::UndefinedBooleanContent:

4017caseTargetLowering::ZeroOrNegativeOneBooleanContent:

4018returnN->isAllOnes() && SExt;

4019 }

4020llvm_unreachable("Unexpected enumeration.");

4021}

4022

4023/// This helper function of SimplifySetCC tries to optimize the comparison when

4024/// either operand of the SetCC node is a bitwise-and instruction.

4025SDValue TargetLowering::foldSetCCWithAnd(EVT VT,SDValue N0,SDValue N1,

4026ISD::CondCode Cond,constSDLoc &DL,

4027 DAGCombinerInfo &DCI) const{

4028if (N1.getOpcode() ==ISD::AND && N0.getOpcode() !=ISD::AND)

4029std::swap(N0, N1);

4030

4031SelectionDAG &DAG = DCI.DAG;

4032EVT OpVT = N0.getValueType();

4033if (N0.getOpcode() !=ISD::AND || !OpVT.isInteger() ||

4034 (Cond !=ISD::SETEQ &&Cond !=ISD::SETNE))

4035returnSDValue();

4036

4037// (X & Y) != 0 --> zextOrTrunc(X & Y)

4038// iff everything but LSB is known zero:

4039if (Cond ==ISD::SETNE &&isNullConstant(N1) &&

4040 (getBooleanContents(OpVT) ==TargetLowering::UndefinedBooleanContent ||

4041getBooleanContents(OpVT) ==TargetLowering::ZeroOrOneBooleanContent)) {

4042unsigned NumEltBits = OpVT.getScalarSizeInBits();

4043APInt UpperBits =APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);

4044if (DAG.MaskedValueIsZero(N0, UpperBits))

4045return DAG.getBoolExtOrTrunc(N0,DL, VT, OpVT);

4046 }

4047

4048// Try to eliminate a power-of-2 mask constant by converting to a signbit

4049// test in a narrow type that we can truncate to with no cost. Examples:

4050// (i32 X & 32768) == 0 --> (trunc X to i16) >= 0

4051// (i32 X & 32768) != 0 --> (trunc X to i16) < 0

4052// TODO: This conservatively checks for type legality on the source and

4053// destination types. That may inhibit optimizations, but it also

4054// allows setcc->shift transforms that may be more beneficial.

4055auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));

4056if (AndC &&isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&

4057isTypeLegal(OpVT) && N0.hasOneUse()) {

4058EVT NarrowVT =EVT::getIntegerVT(*DAG.getContext(),

4059 AndC->getAPIntValue().getActiveBits());

4060if (isTruncateFree(OpVT, NarrowVT) &&isTypeLegal(NarrowVT)) {

4061SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0),DL, NarrowVT);

4062SDValue Zero = DAG.getConstant(0,DL, NarrowVT);

4063return DAG.getSetCC(DL, VT, Trunc, Zero,

4064Cond ==ISD::SETEQ ?ISD::SETGE :ISD::SETLT);

4065 }

4066 }

4067

4068// Match these patterns in any of their permutations:

4069// (X & Y) == Y

4070// (X & Y) != Y

4071SDValue X,Y;

4072if (N0.getOperand(0) == N1) {

4073X = N0.getOperand(1);

4074Y = N0.getOperand(0);

4075 }elseif (N0.getOperand(1) == N1) {

4076X = N0.getOperand(0);

4077Y = N0.getOperand(1);

4078 }else {

4079returnSDValue();

4080 }

4081

4082// TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if

4083// `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as

4084// its liable to create and infinite loop.

4085SDValue Zero = DAG.getConstant(0,DL, OpVT);

4086if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&

4087 DAG.isKnownToBeAPowerOfTwo(Y)) {

4088// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.

4089// Note that where Y is variable and is known to have at most one bit set

4090// (for example, if it is Z & 1) we cannot do this; the expressions are not

4091// equivalent when Y == 0.

4092assert(OpVT.isInteger());

4093Cond =ISD::getSetCCInverse(Cond, OpVT);

4094if (DCI.isBeforeLegalizeOps() ||

4095isCondCodeLegal(Cond, N0.getSimpleValueType()))

4096return DAG.getSetCC(DL, VT, N0, Zero,Cond);

4097 }elseif (N0.hasOneUse() &&hasAndNotCompare(Y)) {

4098// If the target supports an 'and-not' or 'and-complement' logic operation,

4099// try to use that to make a comparison operation more efficient.

4100// But don't do this transform if the mask is a single bit because there are

4101// more efficient ways to deal with that case (for example, 'bt' on x86 or

4102// 'rlwinm' on PPC).

4103

4104// Bail out if the compare operand that we want to turn into a zero is

4105// already a zero (otherwise, infinite loop).

4106if (isNullConstant(Y))

4107returnSDValue();

4108

4109// Transform this into: ~X & Y == 0.

4110SDValue NotX = DAG.getNOT(SDLoc(X),X, OpVT);

4111SDValue NewAnd = DAG.getNode(ISD::AND,SDLoc(N0), OpVT, NotX,Y);

4112return DAG.getSetCC(DL, VT, NewAnd, Zero,Cond);

4113 }

4114

4115returnSDValue();

4116}

4117

4118/// There are multiple IR patterns that could be checking whether certain

4119/// truncation of a signed number would be lossy or not. The pattern which is

4120/// best at IR level, may not lower optimally. Thus, we want to unfold it.

4121/// We are looking for the following pattern: (KeptBits is a constant)

4122/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)

4123/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.

4124/// KeptBits also can't be 1, that would have been folded to %x dstcond 0

4125/// We will unfold it into the natural trunc+sext pattern:

4126/// ((%x << C) a>> C) dstcond %x

4127/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)

4128SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(

4129EVT SCCVT,SDValue N0,SDValue N1,ISD::CondCode Cond, DAGCombinerInfo &DCI,

4130constSDLoc &DL) const{

4131// We must be comparing with a constant.

4132ConstantSDNode *C1;

4133if (!(C1 = dyn_cast<ConstantSDNode>(N1)))

4134returnSDValue();

4135

4136// N0 should be: add %x, (1 << (KeptBits-1))

4137if (N0->getOpcode() !=ISD::ADD)

4138returnSDValue();

4139

4140// And we must be 'add'ing a constant.

4141ConstantSDNode *C01;

4142if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))

4143returnSDValue();

4144

4145SDValue X = N0->getOperand(0);

4146EVT XVT =X.getValueType();

4147

4148// Validate constants ...

4149

4150APInt I1 = C1->getAPIntValue();

4151

4152ISD::CondCode NewCond;

4153if (Cond ==ISD::CondCode::SETULT) {

4154 NewCond =ISD::CondCode::SETEQ;

4155 }elseif (Cond ==ISD::CondCode::SETULE) {

4156 NewCond =ISD::CondCode::SETEQ;

4157// But need to 'canonicalize' the constant.

4158I1 += 1;

4159 }elseif (Cond ==ISD::CondCode::SETUGT) {

4160 NewCond =ISD::CondCode::SETNE;

4161// But need to 'canonicalize' the constant.

4162I1 += 1;

4163 }elseif (Cond ==ISD::CondCode::SETUGE) {

4164 NewCond =ISD::CondCode::SETNE;

4165 }else

4166returnSDValue();

4167

4168APInt I01 = C01->getAPIntValue();

4169

4170auto checkConstants = [&I1, &I01]() ->bool {

4171// Both of them must be power-of-two, and the constant from setcc is bigger.

4172returnI1.ugt(I01) &&I1.isPowerOf2() && I01.isPowerOf2();

4173 };

4174

4175if (checkConstants()) {

4176// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256

4177 }else {

4178// What if we invert constants? (and the target predicate)

4179I1.negate();

4180 I01.negate();

4181assert(XVT.isInteger());

4182 NewCond =getSetCCInverse(NewCond, XVT);

4183if (!checkConstants())

4184returnSDValue();

4185// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256

4186 }

4187

4188// They are power-of-two, so which bit is set?

4189constunsigned KeptBits =I1.logBase2();

4190constunsigned KeptBitsMinusOne = I01.logBase2();

4191

4192// Magic!

4193if (KeptBits != (KeptBitsMinusOne + 1))

4194returnSDValue();

4195assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() &&"unreachable");

4196

4197// We don't want to do this in every single case.

4198SelectionDAG &DAG = DCI.DAG;

4199if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))

4200returnSDValue();

4201

4202// Unfold into: sext_inreg(%x) cond %x

4203// Where 'cond' will be either 'eq' or 'ne'.

4204SDValue SExtInReg = DAG.getNode(

4205ISD::SIGN_EXTEND_INREG,DL, XVT,X,

4206 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));

4207return DAG.getSetCC(DL, SCCVT, SExtInReg,X, NewCond);

4208}

4209

4210// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0

4211SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(

4212EVT SCCVT,SDValue N0,SDValue N1C,ISD::CondCode Cond,

4213 DAGCombinerInfo &DCI,constSDLoc &DL) const{

4214assert(isConstOrConstSplat(N1C) &&isConstOrConstSplat(N1C)->isZero() &&

4215"Should be a comparison with 0.");

4216assert((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

4217"Valid only for [in]equality comparisons.");

4218

4219unsigned NewShiftOpcode;

4220SDValue X,C,Y;

4221

4222SelectionDAG &DAG = DCI.DAG;

4223

4224// Look for '(C l>>/<< Y)'.

4225autoMatch = [&NewShiftOpcode, &X, &C, &Y, &DAG,this](SDValue V) {

4226// The shift should be one-use.

4227if (!V.hasOneUse())

4228returnfalse;

4229unsigned OldShiftOpcode =V.getOpcode();

4230switch (OldShiftOpcode) {

4231caseISD::SHL:

4232 NewShiftOpcode =ISD::SRL;

4233break;

4234caseISD::SRL:

4235 NewShiftOpcode =ISD::SHL;

4236break;

4237default:

4238returnfalse;// must be a logical shift.

4239 }

4240// We should be shifting a constant.

4241// FIXME: best to use isConstantOrConstantVector().

4242C =V.getOperand(0);

4243ConstantSDNode *CC =

4244isConstOrConstSplat(C,/*AllowUndefs=*/true,/*AllowTruncation=*/true);

4245if (!CC)

4246returnfalse;

4247Y =V.getOperand(1);

4248

4249ConstantSDNode *XC =

4250isConstOrConstSplat(X,/*AllowUndefs=*/true,/*AllowTruncation=*/true);

4251returnshouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(

4252X, XC,CC,Y, OldShiftOpcode, NewShiftOpcode, DAG);

4253 };

4254

4255// LHS of comparison should be an one-use 'and'.

4256if (N0.getOpcode() !=ISD::AND || !N0.hasOneUse())

4257returnSDValue();

4258

4259X = N0.getOperand(0);

4260SDValue Mask = N0.getOperand(1);

4261

4262// 'and' is commutative!

4263if (!Match(Mask)) {

4264std::swap(X, Mask);

4265if (!Match(Mask))

4266returnSDValue();

4267 }

4268

4269EVT VT =X.getValueType();

4270

4271// Produce:

4272// ((X 'OppositeShiftOpcode' Y) & C) Cond 0

4273SDValue T0 = DAG.getNode(NewShiftOpcode,DL, VT,X,Y);

4274SDValue T1 = DAG.getNode(ISD::AND,DL, VT, T0,C);

4275SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C,Cond);

4276return T2;

4277}

4278

4279/// Try to fold an equality comparison with a {add/sub/xor} binary operation as

4280/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to

4281/// handle the commuted versions of these patterns.

4282SDValue TargetLowering::foldSetCCWithBinOp(EVT VT,SDValue N0,SDValue N1,

4283ISD::CondCode Cond,constSDLoc &DL,

4284 DAGCombinerInfo &DCI) const{

4285unsigned BOpcode = N0.getOpcode();

4286assert((BOpcode ==ISD::ADD || BOpcode ==ISD::SUB || BOpcode ==ISD::XOR) &&

4287"Unexpected binop");

4288assert((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&"Unexpected condcode");

4289

4290// (X + Y) == X --> Y == 0

4291// (X - Y) == X --> Y == 0

4292// (X ^ Y) == X --> Y == 0

4293SelectionDAG &DAG = DCI.DAG;

4294EVT OpVT = N0.getValueType();

4295SDValue X = N0.getOperand(0);

4296SDValue Y = N0.getOperand(1);

4297if (X == N1)

4298return DAG.getSetCC(DL, VT,Y, DAG.getConstant(0,DL, OpVT),Cond);

4299

4300if (Y != N1)

4301returnSDValue();

4302

4303// (X + Y) == Y --> X == 0

4304// (X ^ Y) == Y --> X == 0

4305if (BOpcode ==ISD::ADD || BOpcode ==ISD::XOR)

4306return DAG.getSetCC(DL, VT,X, DAG.getConstant(0,DL, OpVT),Cond);

4307

4308// The shift would not be valid if the operands are boolean (i1).

4309if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)

4310returnSDValue();

4311

4312// (X - Y) == Y --> X == Y << 1

4313SDValue One = DAG.getShiftAmountConstant(1, OpVT,DL);

4314SDValue YShl1 = DAG.getNode(ISD::SHL,DL, N1.getValueType(),Y, One);

4315if (!DCI.isCalledByLegalizer())

4316 DCI.AddToWorklist(YShl1.getNode());

4317return DAG.getSetCC(DL, VT,X, YShl1,Cond);

4318}

4319

4320staticSDValue simplifySetCCWithCTPOP(constTargetLowering &TLI,EVT VT,

4321SDValue N0,constAPInt &C1,

4322ISD::CondCode Cond,constSDLoc &dl,

4323SelectionDAG &DAG) {

4324// Look through truncs that don't change the value of a ctpop.

4325// FIXME: Add vector support? Need to be careful with setcc result type below.

4326SDValue CTPOP = N0;

4327if (N0.getOpcode() ==ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&

4328 N0.getScalarValueSizeInBits() >Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))

4329 CTPOP = N0.getOperand(0);

4330

4331if (CTPOP.getOpcode() !=ISD::CTPOP || !CTPOP.hasOneUse())

4332returnSDValue();

4333

4334EVT CTVT = CTPOP.getValueType();

4335SDValue CTOp = CTPOP.getOperand(0);

4336

4337// Expand a power-of-2-or-zero comparison based on ctpop:

4338// (ctpop x) u< 2 -> (x & x-1) == 0

4339// (ctpop x) u> 1 -> (x & x-1) != 0

4340if (Cond ==ISD::SETULT ||Cond ==ISD::SETUGT) {

4341// Keep the CTPOP if it is a cheap vector op.

4342if (CTVT.isVector() && TLI.isCtpopFast(CTVT))

4343returnSDValue();

4344

4345unsigned CostLimit = TLI.getCustomCtpopCost(CTVT,Cond);

4346if (C1.ugt(CostLimit + (Cond ==ISD::SETULT)))

4347returnSDValue();

4348if (C1 == 0 && (Cond ==ISD::SETULT))

4349returnSDValue();// This is handled elsewhere.

4350

4351unsignedPasses = C1.getLimitedValue() - (Cond ==ISD::SETULT);

4352

4353SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);

4354SDValue Result = CTOp;

4355for (unsigned i = 0; i <Passes; i++) {

4356SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);

4357 Result = DAG.getNode(ISD::AND, dl, CTVT, Result,Add);

4358 }

4359ISD::CondCode CC =Cond ==ISD::SETULT ?ISD::SETEQ :ISD::SETNE;

4360return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT),CC);

4361 }

4362

4363// Expand a power-of-2 comparison based on ctpop

4364if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) && C1 == 1) {

4365// Keep the CTPOP if it is cheap.

4366if (TLI.isCtpopFast(CTVT))

4367returnSDValue();

4368

4369SDValue Zero = DAG.getConstant(0, dl, CTVT);

4370SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);

4371assert(CTVT.isInteger());

4372SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);

4373

4374// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so

4375// check before emitting a potentially unnecessary op.

4376if (DAG.isKnownNeverZero(CTOp)) {

4377// (ctpop x) == 1 --> (x & x-1) == 0

4378// (ctpop x) != 1 --> (x & x-1) != 0

4379SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp,Add);

4380SDValue RHS = DAG.getSetCC(dl, VT,And, Zero,Cond);

4381returnRHS;

4382 }

4383

4384// (ctpop x) == 1 --> (x ^ x-1) > x-1

4385// (ctpop x) != 1 --> (x ^ x-1) <= x-1

4386SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp,Add);

4387ISD::CondCode CmpCond =Cond ==ISD::SETEQ ?ISD::SETUGT :ISD::SETULE;

4388return DAG.getSetCC(dl, VT,Xor,Add, CmpCond);

4389 }

4390

4391returnSDValue();

4392}

4393

4394staticSDValue foldSetCCWithRotate(EVT VT,SDValue N0,SDValue N1,

4395ISD::CondCode Cond,constSDLoc &dl,

4396SelectionDAG &DAG) {

4397if (Cond !=ISD::SETEQ &&Cond !=ISD::SETNE)

4398returnSDValue();

4399

4400auto *C1 =isConstOrConstSplat(N1,/* AllowUndefs */true);

4401if (!C1 || !(C1->isZero() || C1->isAllOnes()))

4402returnSDValue();

4403

4404auto getRotateSource = [](SDValue X) {

4405if (X.getOpcode() ==ISD::ROTL ||X.getOpcode() ==ISD::ROTR)

4406returnX.getOperand(0);

4407returnSDValue();

4408 };

4409

4410// Peek through a rotated value compared against 0 or -1:

4411// (rot X, Y) == 0/-1 --> X == 0/-1

4412// (rot X, Y) != 0/-1 --> X != 0/-1

4413if (SDValue R = getRotateSource(N0))

4414return DAG.getSetCC(dl, VT, R, N1,Cond);

4415

4416// Peek through an 'or' of a rotated value compared against 0:

4417// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0

4418// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0

4419//

4420// TODO: Add the 'and' with -1 sibling.

4421// TODO: Recurse through a series of 'or' ops to find the rotate.

4422EVT OpVT = N0.getValueType();

4423if (N0.hasOneUse() && N0.getOpcode() ==ISD::OR && C1->isZero()) {

4424if (SDValue R = getRotateSource(N0.getOperand(0))) {

4425SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));

4426return DAG.getSetCC(dl, VT, NewOr, N1,Cond);

4427 }

4428if (SDValue R = getRotateSource(N0.getOperand(1))) {

4429SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));

4430return DAG.getSetCC(dl, VT, NewOr, N1,Cond);

4431 }

4432 }

4433

4434returnSDValue();

4435}

4436

4437staticSDValue foldSetCCWithFunnelShift(EVT VT,SDValue N0,SDValue N1,

4438ISD::CondCode Cond,constSDLoc &dl,

4439SelectionDAG &DAG) {

4440// If we are testing for all-bits-clear, we might be able to do that with

4441// less shifting since bit-order does not matter.

4442if (Cond !=ISD::SETEQ &&Cond !=ISD::SETNE)

4443returnSDValue();

4444

4445auto *C1 =isConstOrConstSplat(N1,/* AllowUndefs */true);

4446if (!C1 || !C1->isZero())

4447returnSDValue();

4448

4449if (!N0.hasOneUse() ||

4450 (N0.getOpcode() !=ISD::FSHL && N0.getOpcode() !=ISD::FSHR))

4451returnSDValue();

4452

4453unsignedBitWidth = N0.getScalarValueSizeInBits();

4454auto *ShAmtC =isConstOrConstSplat(N0.getOperand(2));

4455if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))

4456returnSDValue();

4457

4458// Canonicalize fshr as fshl to reduce pattern-matching.

4459unsigned ShAmt = ShAmtC->getZExtValue();

4460if (N0.getOpcode() ==ISD::FSHR)

4461 ShAmt =BitWidth - ShAmt;

4462

4463// Match an 'or' with a specific operand 'Other' in either commuted variant.

4464SDValue X,Y;

4465auto matchOr = [&X, &Y](SDValue Or,SDValue Other) {

4466if (Or.getOpcode() !=ISD::OR || !Or.hasOneUse())

4467returnfalse;

4468if (Or.getOperand(0) ==Other) {

4469X =Or.getOperand(0);

4470Y =Or.getOperand(1);

4471returntrue;

4472 }

4473if (Or.getOperand(1) ==Other) {

4474X =Or.getOperand(1);

4475Y =Or.getOperand(0);

4476returntrue;

4477 }

4478returnfalse;

4479 };

4480

4481EVT OpVT = N0.getValueType();

4482EVT ShAmtVT = N0.getOperand(2).getValueType();

4483SDValue F0 = N0.getOperand(0);

4484SDValue F1 = N0.getOperand(1);

4485if (matchOr(F0, F1)) {

4486// fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0

4487SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);

4488SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT,Y, NewShAmt);

4489SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift,X);

4490return DAG.getSetCC(dl, VT, NewOr, N1,Cond);

4491 }

4492if (matchOr(F1, F0)) {

4493// fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0

4494SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);

4495SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT,Y, NewShAmt);

4496SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift,X);

4497return DAG.getSetCC(dl, VT, NewOr, N1,Cond);

4498 }

4499

4500returnSDValue();

4501}

4502

4503/// Try to simplify a setcc built with the specified operands and cc. If it is

4504/// unable to simplify it, return a null SDValue.

4505SDValue TargetLowering::SimplifySetCC(EVT VT,SDValue N0,SDValue N1,

4506ISD::CondCode Cond,bool foldBooleans,

4507DAGCombinerInfo &DCI,

4508constSDLoc &dl) const{

4509SelectionDAG &DAG = DCI.DAG;

4510constDataLayout &Layout = DAG.getDataLayout();

4511EVT OpVT = N0.getValueType();

4512AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

4513

4514// Constant fold or commute setcc.

4515if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1,Cond, dl))

4516return Fold;

4517

4518bool N0ConstOrSplat =

4519isConstOrConstSplat(N0,/*AllowUndefs*/false,/*AllowTruncate*/true);

4520bool N1ConstOrSplat =

4521isConstOrConstSplat(N1,/*AllowUndefs*/false,/*AllowTruncate*/true);

4522

4523// Canonicalize toward having the constant on the RHS.

4524// TODO: Handle non-splat vector constants. All undef causes trouble.

4525// FIXME: We can't yet fold constant scalable vector splats, so avoid an

4526// infinite loop here when we encounter one.

4527ISD::CondCode SwappedCC =ISD::getSetCCSwappedOperands(Cond);

4528if (N0ConstOrSplat && !N1ConstOrSplat &&

4529 (DCI.isBeforeLegalizeOps() ||

4530isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))

4531return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);

4532

4533// If we have a subtract with the same 2 non-constant operands as this setcc

4534// -- but in reverse order -- then try to commute the operands of this setcc

4535// to match. A matching pair of setcc (cmp) and sub may be combined into 1

4536// instruction on some targets.

4537if (!N0ConstOrSplat && !N1ConstOrSplat &&

4538 (DCI.isBeforeLegalizeOps() ||

4539isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&

4540 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&

4541 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))

4542return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);

4543

4544if (SDValue V =foldSetCCWithRotate(VT, N0, N1,Cond, dl, DAG))

4545return V;

4546

4547if (SDValue V =foldSetCCWithFunnelShift(VT, N0, N1,Cond, dl, DAG))

4548return V;

4549

4550if (auto *N1C =isConstOrConstSplat(N1)) {

4551constAPInt &C1 = N1C->getAPIntValue();

4552

4553// Optimize some CTPOP cases.

4554if (SDValue V =simplifySetCCWithCTPOP(*this, VT, N0, C1,Cond, dl, DAG))

4555return V;

4556

4557// For equality to 0 of a no-wrap multiply, decompose and test each op:

4558// X * Y == 0 --> (X == 0) || (Y == 0)

4559// X * Y != 0 --> (X != 0) && (Y != 0)

4560// TODO: This bails out if minsize is set, but if the target doesn't have a

4561// single instruction multiply for this type, it would likely be

4562// smaller to decompose.

4563if (C1.isZero() && (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

4564 N0.getOpcode() ==ISD::MUL && N0.hasOneUse() &&

4565 (N0->getFlags().hasNoUnsignedWrap() ||

4566 N0->getFlags().hasNoSignedWrap()) &&

4567 !Attr.hasFnAttr(Attribute::MinSize)) {

4568SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1,Cond);

4569SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1,Cond);

4570unsigned LogicOp =Cond ==ISD::SETEQ ?ISD::OR :ISD::AND;

4571return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);

4572 }

4573

4574// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an

4575// equality comparison, then we're just comparing whether X itself is

4576// zero.

4577if (N0.getOpcode() ==ISD::SRL && (C1.isZero() || C1.isOne()) &&

4578 N0.getOperand(0).getOpcode() ==ISD::CTLZ &&

4579 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {

4580if (ConstantSDNode *ShAmt =isConstOrConstSplat(N0.getOperand(1))) {

4581if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

4582 ShAmt->getAPIntValue() ==Log2_32(N0.getScalarValueSizeInBits())) {

4583if ((C1 == 0) == (Cond ==ISD::SETEQ)) {

4584// (srl (ctlz x), 5) == 0 -> X != 0

4585// (srl (ctlz x), 5) != 1 -> X != 0

4586Cond =ISD::SETNE;

4587 }else {

4588// (srl (ctlz x), 5) != 0 -> X == 0

4589// (srl (ctlz x), 5) == 1 -> X == 0

4590Cond =ISD::SETEQ;

4591 }

4592SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());

4593return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,

4594Cond);

4595 }

4596 }

4597 }

4598 }

4599

4600// FIXME: Support vectors.

4601if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {

4602constAPInt &C1 = N1C->getAPIntValue();

4603

4604// (zext x) == C --> x == (trunc C)

4605// (sext x) == C --> x == (trunc C)

4606if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

4607 DCI.isBeforeLegalize() && N0->hasOneUse()) {

4608unsigned MinBits = N0.getValueSizeInBits();

4609SDValue PreExt;

4610boolSigned =false;

4611if (N0->getOpcode() ==ISD::ZERO_EXTEND) {

4612// ZExt

4613 MinBits = N0->getOperand(0).getValueSizeInBits();

4614 PreExt = N0->getOperand(0);

4615 }elseif (N0->getOpcode() ==ISD::AND) {

4616// DAGCombine turns costly ZExts into ANDs

4617if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))

4618if ((C->getAPIntValue()+1).isPowerOf2()) {

4619 MinBits =C->getAPIntValue().countr_one();

4620 PreExt = N0->getOperand(0);

4621 }

4622 }elseif (N0->getOpcode() ==ISD::SIGN_EXTEND) {

4623// SExt

4624 MinBits = N0->getOperand(0).getValueSizeInBits();

4625 PreExt = N0->getOperand(0);

4626Signed =true;

4627 }elseif (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {

4628// ZEXTLOAD / SEXTLOAD

4629if (LN0->getExtensionType() ==ISD::ZEXTLOAD) {

4630 MinBits = LN0->getMemoryVT().getSizeInBits();

4631 PreExt = N0;

4632 }elseif (LN0->getExtensionType() ==ISD::SEXTLOAD) {

4633Signed =true;

4634 MinBits = LN0->getMemoryVT().getSizeInBits();

4635 PreExt = N0;

4636 }

4637 }

4638

4639// Figure out how many bits we need to preserve this constant.

4640unsigned ReqdBits =Signed ? C1.getSignificantBits() : C1.getActiveBits();

4641

4642// Make sure we're not losing bits from the constant.

4643if (MinBits > 0 &&

4644 MinBits < C1.getBitWidth() &&

4645 MinBits >= ReqdBits) {

4646EVT MinVT =EVT::getIntegerVT(*DAG.getContext(), MinBits);

4647if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {

4648// Will get folded away.

4649SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);

4650if (MinBits == 1 && C1 == 1)

4651// Invert the condition.

4652return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),

4653Cond ==ISD::SETEQ ?ISD::SETNE :ISD::SETEQ);

4654SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);

4655return DAG.getSetCC(dl, VT, Trunc,C,Cond);

4656 }

4657

4658// If truncating the setcc operands is not desirable, we can still

4659// simplify the expression in some cases:

4660// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)

4661// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))

4662// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))

4663// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)

4664// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))

4665// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)

4666SDValue TopSetCC = N0->getOperand(0);

4667unsigned N0Opc = N0->getOpcode();

4668bool SExt = (N0Opc ==ISD::SIGN_EXTEND);

4669if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&

4670 TopSetCC.getOpcode() ==ISD::SETCC &&

4671 (N0Opc ==ISD::ZERO_EXTEND || N0Opc ==ISD::SIGN_EXTEND) &&

4672 (isConstFalseVal(N1) ||

4673 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {

4674

4675boolInverse = (N1C->isZero() &&Cond ==ISD::SETEQ) ||

4676 (!N1C->isZero() &&Cond ==ISD::SETNE);

4677

4678if (!Inverse)

4679return TopSetCC;

4680

4681ISD::CondCode InvCond =ISD::getSetCCInverse(

4682 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),

4683 TopSetCC.getOperand(0).getValueType());

4684return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),

4685 TopSetCC.getOperand(1),

4686 InvCond);

4687 }

4688 }

4689 }

4690

4691// If the LHS is '(and load, const)', the RHS is 0, the test is for

4692// equality or unsigned, and all 1 bits of the const are in the same

4693// partial word, see if we can shorten the load.

4694if (DCI.isBeforeLegalize() &&

4695 !ISD::isSignedIntSetCC(Cond) &&

4696 N0.getOpcode() ==ISD::AND && C1 == 0 &&

4697 N0.getNode()->hasOneUse() &&

4698 isa<LoadSDNode>(N0.getOperand(0)) &&

4699 N0.getOperand(0).getNode()->hasOneUse() &&

4700 isa<ConstantSDNode>(N0.getOperand(1))) {

4701auto *Lod = cast<LoadSDNode>(N0.getOperand(0));

4702APInt bestMask;

4703unsigned bestWidth = 0, bestOffset = 0;

4704if (Lod->isSimple() && Lod->isUnindexed() &&

4705 (Lod->getMemoryVT().isByteSized() ||

4706isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {

4707unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();

4708unsigned origWidth = N0.getValueSizeInBits();

4709unsigned maskWidth = origWidth;

4710// We can narrow (e.g.) 16-bit extending loads on 32-bit target to

4711// 8 bits, but have to be careful...

4712if (Lod->getExtensionType() !=ISD::NON_EXTLOAD)

4713 origWidth = Lod->getMemoryVT().getSizeInBits();

4714constAPInt &Mask = N0.getConstantOperandAPInt(1);

4715// Only consider power-of-2 widths (and at least one byte) as candiates

4716// for the narrowed load.

4717for (unsigned width = 8; width < origWidth; width *= 2) {

4718EVT newVT =EVT::getIntegerVT(*DAG.getContext(), width);

4719if (!shouldReduceLoadWidth(Lod,ISD::NON_EXTLOAD, newVT))

4720continue;

4721APInt newMask =APInt::getLowBitsSet(maskWidth, width);

4722// Avoid accessing any padding here for now (we could use memWidth

4723// instead of origWidth here otherwise).

4724unsigned maxOffset = origWidth - width;

4725for (unsigned offset = 0; offset <= maxOffset; offset += 8) {

4726if (Mask.isSubsetOf(newMask)) {

4727unsigned ptrOffset =

4728 Layout.isLittleEndian() ? offset : memWidth - width - offset;

4729unsigned IsFast = 0;

4730Align NewAlign =commonAlignment(Lod->getAlign(), ptrOffset / 8);

4731if (allowsMemoryAccess(

4732 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),

4733 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&

4734 IsFast) {

4735 bestOffset = ptrOffset / 8;

4736 bestMask = Mask.lshr(offset);

4737 bestWidth = width;

4738break;

4739 }

4740 }

4741 newMask <<= 8;

4742 }

4743if (bestWidth)

4744break;

4745 }

4746 }

4747if (bestWidth) {

4748EVT newVT =EVT::getIntegerVT(*DAG.getContext(), bestWidth);

4749SDValue Ptr = Lod->getBasePtr();

4750if (bestOffset != 0)

4751Ptr = DAG.getObjectPtrOffset(dl,Ptr,TypeSize::getFixed(bestOffset));

4752SDValue NewLoad =

4753 DAG.getLoad(newVT, dl, Lod->getChain(),Ptr,

4754 Lod->getPointerInfo().getWithOffset(bestOffset),

4755 Lod->getOriginalAlign());

4756SDValue And =

4757 DAG.getNode(ISD::AND, dl, newVT, NewLoad,

4758 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));

4759return DAG.getSetCC(dl, VT,And, DAG.getConstant(0LL, dl, newVT),Cond);

4760 }

4761 }

4762

4763// If the LHS is a ZERO_EXTEND, perform the comparison on the input.

4764if (N0.getOpcode() ==ISD::ZERO_EXTEND) {

4765unsigned InSize = N0.getOperand(0).getValueSizeInBits();

4766

4767// If the comparison constant has bits in the upper part, the

4768// zero-extended value could never match.

4769if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),

4770 C1.getBitWidth() - InSize))) {

4771switch (Cond) {

4772caseISD::SETUGT:

4773caseISD::SETUGE:

4774caseISD::SETEQ:

4775return DAG.getConstant(0, dl, VT);

4776caseISD::SETULT:

4777caseISD::SETULE:

4778caseISD::SETNE:

4779return DAG.getConstant(1, dl, VT);

4780caseISD::SETGT:

4781caseISD::SETGE:

4782// True if the sign bit of C1 is set.

4783return DAG.getConstant(C1.isNegative(), dl, VT);

4784caseISD::SETLT:

4785caseISD::SETLE:

4786// True if the sign bit of C1 isn't set.

4787return DAG.getConstant(C1.isNonNegative(), dl, VT);

4788default:

4789break;

4790 }

4791 }

4792

4793// Otherwise, we can perform the comparison with the low bits.

4794switch (Cond) {

4795caseISD::SETEQ:

4796caseISD::SETNE:

4797caseISD::SETUGT:

4798caseISD::SETUGE:

4799caseISD::SETULT:

4800caseISD::SETULE: {

4801EVT newVT = N0.getOperand(0).getValueType();

4802// FIXME: Should use isNarrowingProfitable.

4803if (DCI.isBeforeLegalizeOps() ||

4804 (isOperationLegal(ISD::SETCC, newVT) &&

4805isCondCodeLegal(Cond, newVT.getSimpleVT()) &&

4806 isTypeDesirableForOp(ISD::SETCC, newVT))) {

4807EVT NewSetCCVT =getSetCCResultType(Layout, *DAG.getContext(), newVT);

4808SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);

4809

4810SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),

4811 NewConst,Cond);

4812return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());

4813 }

4814break;

4815 }

4816default:

4817break;// todo, be more careful with signed comparisons

4818 }

4819 }elseif (N0.getOpcode() ==ISD::SIGN_EXTEND_INREG &&

4820 (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

4821 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),

4822 OpVT)) {

4823EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();

4824unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();

4825EVT ExtDstTy = N0.getValueType();

4826unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();

4827

4828// If the constant doesn't fit into the number of bits for the source of

4829// the sign extension, it is impossible for both sides to be equal.

4830if (C1.getSignificantBits() > ExtSrcTyBits)

4831return DAG.getBoolConstant(Cond ==ISD::SETNE, dl, VT, OpVT);

4832

4833assert(ExtDstTy == N0.getOperand(0).getValueType() &&

4834 ExtDstTy != ExtSrcTy &&"Unexpected types!");

4835APInt Imm =APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);

4836SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),

4837 DAG.getConstant(Imm, dl, ExtDstTy));

4838if (!DCI.isCalledByLegalizer())

4839 DCI.AddToWorklist(ZextOp.getNode());

4840// Otherwise, make this a use of a zext.

4841return DAG.getSetCC(dl, VT, ZextOp,

4842 DAG.getConstant(C1 & Imm, dl, ExtDstTy),Cond);

4843 }elseif ((N1C->isZero() || N1C->isOne()) &&

4844 (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE)) {

4845// SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are

4846// excluded as they are handled below whilst checking for foldBooleans.

4847if ((N0.getOpcode() ==ISD::SETCC || VT.getScalarType() != MVT::i1) &&

4848isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&

4849 (N0.getValueType() == MVT::i1 ||

4850getBooleanContents(N0.getValueType()) ==ZeroOrOneBooleanContent) &&

4851 DAG.MaskedValueIsZero(

4852 N0,APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {

4853bool TrueWhenTrue = (Cond ==ISD::SETEQ) ^ (!N1C->isOne());

4854if (TrueWhenTrue)

4855return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);

4856// Invert the condition.

4857if (N0.getOpcode() ==ISD::SETCC) {

4858ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();

4859CC =ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());

4860if (DCI.isBeforeLegalizeOps() ||

4861isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))

4862return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1),CC);

4863 }

4864 }

4865

4866if ((N0.getOpcode() ==ISD::XOR ||

4867 (N0.getOpcode() ==ISD::AND &&

4868 N0.getOperand(0).getOpcode() ==ISD::XOR &&

4869 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&

4870isOneConstant(N0.getOperand(1))) {

4871// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We

4872// can only do this if the top bits are known zero.

4873unsignedBitWidth = N0.getValueSizeInBits();

4874if (DAG.MaskedValueIsZero(N0,

4875APInt::getHighBitsSet(BitWidth,

4876BitWidth-1))) {

4877// Okay, get the un-inverted input value.

4878SDValue Val;

4879if (N0.getOpcode() ==ISD::XOR) {

4880 Val = N0.getOperand(0);

4881 }else {

4882assert(N0.getOpcode() ==ISD::AND &&

4883 N0.getOperand(0).getOpcode() ==ISD::XOR);

4884// ((X^1)&1)^1 -> X & 1

4885 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),

4886 N0.getOperand(0).getOperand(0),

4887 N0.getOperand(1));

4888 }

4889

4890return DAG.getSetCC(dl, VT, Val, N1,

4891Cond ==ISD::SETEQ ?ISD::SETNE :ISD::SETEQ);

4892 }

4893 }elseif (N1C->isOne()) {

4894SDValue Op0 = N0;

4895if (Op0.getOpcode() ==ISD::TRUNCATE)

4896 Op0 = Op0.getOperand(0);

4897

4898if ((Op0.getOpcode() ==ISD::XOR) &&

4899 Op0.getOperand(0).getOpcode() ==ISD::SETCC &&

4900 Op0.getOperand(1).getOpcode() ==ISD::SETCC) {

4901SDValue XorLHS = Op0.getOperand(0);

4902SDValue XorRHS = Op0.getOperand(1);

4903// Ensure that the input setccs return an i1 type or 0/1 value.

4904if (Op0.getValueType() == MVT::i1 ||

4905 (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==

4906ZeroOrOneBooleanContent &&

4907getBooleanContents(XorRHS.getOperand(0).getValueType()) ==

4908ZeroOrOneBooleanContent)) {

4909// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)

4910Cond = (Cond ==ISD::SETEQ) ?ISD::SETNE :ISD::SETEQ;

4911return DAG.getSetCC(dl, VT, XorLHS, XorRHS,Cond);

4912 }

4913 }

4914if (Op0.getOpcode() ==ISD::AND &&isOneConstant(Op0.getOperand(1))) {

4915// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.

4916if (Op0.getValueType().bitsGT(VT))

4917 Op0 = DAG.getNode(ISD::AND, dl, VT,

4918 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),

4919 DAG.getConstant(1, dl, VT));

4920elseif (Op0.getValueType().bitsLT(VT))

4921 Op0 = DAG.getNode(ISD::AND, dl, VT,

4922 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),

4923 DAG.getConstant(1, dl, VT));

4924

4925return DAG.getSetCC(dl, VT, Op0,

4926 DAG.getConstant(0, dl, Op0.getValueType()),

4927Cond ==ISD::SETEQ ?ISD::SETNE :ISD::SETEQ);

4928 }

4929if (Op0.getOpcode() ==ISD::AssertZext &&

4930 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)

4931return DAG.getSetCC(dl, VT, Op0,

4932 DAG.getConstant(0, dl, Op0.getValueType()),

4933Cond ==ISD::SETEQ ?ISD::SETNE :ISD::SETEQ);

4934 }

4935 }

4936

4937// Given:

4938// icmp eq/ne (urem %x, %y), 0

4939// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':

4940// icmp eq/ne %x, 0

4941if (N0.getOpcode() ==ISD::UREM && N1C->isZero() &&

4942 (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE)) {

4943KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));

4944KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));

4945if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)

4946return DAG.getSetCC(dl, VT, N0.getOperand(0), N1,Cond);

4947 }

4948

4949// Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0

4950// and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0

4951if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

4952 N0.getOpcode() ==ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&

4953 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&

4954 N1C->isAllOnes()) {

4955return DAG.getSetCC(dl, VT, N0.getOperand(0),

4956 DAG.getConstant(0, dl, OpVT),

4957Cond ==ISD::SETEQ ?ISD::SETLT :ISD::SETGE);

4958 }

4959

4960if (SDValue V =

4961 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1,Cond, DCI, dl))

4962return V;

4963 }

4964

4965// These simplifications apply to splat vectors as well.

4966// TODO: Handle more splat vector cases.

4967if (auto *N1C =isConstOrConstSplat(N1)) {

4968constAPInt &C1 = N1C->getAPIntValue();

4969

4970APInt MinVal, MaxVal;

4971unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();

4972if (ISD::isSignedIntSetCC(Cond)) {

4973 MinVal =APInt::getSignedMinValue(OperandBitSize);

4974 MaxVal =APInt::getSignedMaxValue(OperandBitSize);

4975 }else {

4976 MinVal =APInt::getMinValue(OperandBitSize);

4977 MaxVal =APInt::getMaxValue(OperandBitSize);

4978 }

4979

4980// Canonicalize GE/LE comparisons to use GT/LT comparisons.

4981if (Cond ==ISD::SETGE ||Cond ==ISD::SETUGE) {

4982// X >= MIN --> true

4983if (C1 == MinVal)

4984return DAG.getBoolConstant(true, dl, VT, OpVT);

4985

4986if (!VT.isVector()) {// TODO: Support this for vectors.

4987// X >= C0 --> X > (C0 - 1)

4988APInt C = C1 - 1;

4989ISD::CondCode NewCC = (Cond ==ISD::SETGE) ?ISD::SETGT :ISD::SETUGT;

4990if ((DCI.isBeforeLegalizeOps() ||

4991isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&

4992 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&

4993isLegalICmpImmediate(C.getSExtValue())))) {

4994return DAG.getSetCC(dl, VT, N0,

4995 DAG.getConstant(C, dl, N1.getValueType()),

4996 NewCC);

4997 }

4998 }

4999 }

5000

5001if (Cond ==ISD::SETLE ||Cond ==ISD::SETULE) {

5002// X <= MAX --> true

5003if (C1 == MaxVal)

5004return DAG.getBoolConstant(true, dl, VT, OpVT);

5005

5006// X <= C0 --> X < (C0 + 1)

5007if (!VT.isVector()) {// TODO: Support this for vectors.

5008APInt C = C1 + 1;

5009ISD::CondCode NewCC = (Cond ==ISD::SETLE) ?ISD::SETLT :ISD::SETULT;

5010if ((DCI.isBeforeLegalizeOps() ||

5011isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&

5012 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&

5013isLegalICmpImmediate(C.getSExtValue())))) {

5014return DAG.getSetCC(dl, VT, N0,

5015 DAG.getConstant(C, dl, N1.getValueType()),

5016 NewCC);

5017 }

5018 }

5019 }

5020

5021if (Cond ==ISD::SETLT ||Cond ==ISD::SETULT) {

5022if (C1 == MinVal)

5023return DAG.getBoolConstant(false, dl, VT, OpVT);// X < MIN --> false

5024

5025// TODO: Support this for vectors after legalize ops.

5026if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {

5027// Canonicalize setlt X, Max --> setne X, Max

5028if (C1 == MaxVal)

5029return DAG.getSetCC(dl, VT, N0, N1,ISD::SETNE);

5030

5031// If we have setult X, 1, turn it into seteq X, 0

5032if (C1 == MinVal+1)

5033return DAG.getSetCC(dl, VT, N0,

5034 DAG.getConstant(MinVal, dl, N0.getValueType()),

5035ISD::SETEQ);

5036 }

5037 }

5038

5039if (Cond ==ISD::SETGT ||Cond ==ISD::SETUGT) {

5040if (C1 == MaxVal)

5041return DAG.getBoolConstant(false, dl, VT, OpVT);// X > MAX --> false

5042

5043// TODO: Support this for vectors after legalize ops.

5044if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {

5045// Canonicalize setgt X, Min --> setne X, Min

5046if (C1 == MinVal)

5047return DAG.getSetCC(dl, VT, N0, N1,ISD::SETNE);

5048

5049// If we have setugt X, Max-1, turn it into seteq X, Max

5050if (C1 == MaxVal-1)

5051return DAG.getSetCC(dl, VT, N0,

5052 DAG.getConstant(MaxVal, dl, N0.getValueType()),

5053ISD::SETEQ);

5054 }

5055 }

5056

5057if (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) {

5058// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0

5059if (C1.isZero())

5060if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(

5061 VT, N0, N1,Cond, DCI, dl))

5062returnCC;

5063

5064// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).

5065// For example, when high 32-bits of i64 X are known clear:

5066// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0

5067// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1

5068bool CmpZero = N1C->isZero();

5069bool CmpNegOne = N1C->isAllOnes();

5070if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {

5071// Match or(lo,shl(hi,bw/2)) pattern.

5072auto IsConcat = [&](SDValue V,SDValue &Lo,SDValue &Hi) {

5073unsigned EltBits = V.getScalarValueSizeInBits();

5074if (V.getOpcode() !=ISD::OR || (EltBits % 2) != 0)

5075returnfalse;

5076SDValue LHS = V.getOperand(0);

5077SDValue RHS = V.getOperand(1);

5078APInt HiBits =APInt::getHighBitsSet(EltBits, EltBits / 2);

5079// Unshifted element must have zero upperbits.

5080if (RHS.getOpcode() ==ISD::SHL &&

5081 isa<ConstantSDNode>(RHS.getOperand(1)) &&

5082RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&

5083 DAG.MaskedValueIsZero(LHS, HiBits)) {

5084Lo =LHS;

5085Hi =RHS.getOperand(0);

5086returntrue;

5087 }

5088if (LHS.getOpcode() ==ISD::SHL &&

5089 isa<ConstantSDNode>(LHS.getOperand(1)) &&

5090LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&

5091 DAG.MaskedValueIsZero(RHS, HiBits)) {

5092Lo =RHS;

5093Hi =LHS.getOperand(0);

5094returntrue;

5095 }

5096returnfalse;

5097 };

5098

5099auto MergeConcat = [&](SDValue Lo,SDValue Hi) {

5100unsigned EltBits = N0.getScalarValueSizeInBits();

5101unsigned HalfBits = EltBits / 2;

5102APInt HiBits =APInt::getHighBitsSet(EltBits, HalfBits);

5103SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);

5104SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT,Hi, LoBits);

5105SDValue NewN0 =

5106 DAG.getNode(CmpZero ?ISD::OR :ISD::AND, dl, OpVT,Lo, HiMask);

5107SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;

5108return DAG.getSetCC(dl, VT, NewN0, NewN1,Cond);

5109 };

5110

5111SDValue Lo,Hi;

5112if (IsConcat(N0,Lo,Hi))

5113return MergeConcat(Lo,Hi);

5114

5115if (N0.getOpcode() ==ISD::AND || N0.getOpcode() ==ISD::OR) {

5116SDValue Lo0, Lo1, Hi0, Hi1;

5117if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&

5118 IsConcat(N0.getOperand(1), Lo1, Hi1)) {

5119return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),

5120 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));

5121 }

5122 }

5123 }

5124 }

5125

5126// If we have "setcc X, C0", check to see if we can shrink the immediate

5127// by changing cc.

5128// TODO: Support this for vectors after legalize ops.

5129if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {

5130// SETUGT X, SINTMAX -> SETLT X, 0

5131// SETUGE X, SINTMIN -> SETLT X, 0

5132if ((Cond ==ISD::SETUGT && C1.isMaxSignedValue()) ||

5133 (Cond ==ISD::SETUGE && C1.isMinSignedValue()))

5134return DAG.getSetCC(dl, VT, N0,

5135 DAG.getConstant(0, dl, N1.getValueType()),

5136ISD::SETLT);

5137

5138// SETULT X, SINTMIN -> SETGT X, -1

5139// SETULE X, SINTMAX -> SETGT X, -1

5140if ((Cond ==ISD::SETULT && C1.isMinSignedValue()) ||

5141 (Cond ==ISD::SETULE && C1.isMaxSignedValue()))

5142return DAG.getSetCC(dl, VT, N0,

5143 DAG.getAllOnesConstant(dl, N1.getValueType()),

5144ISD::SETGT);

5145 }

5146 }

5147

5148// Back to non-vector simplifications.

5149// TODO: Can we do these for vector splats?

5150if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {

5151constAPInt &C1 = N1C->getAPIntValue();

5152EVT ShValTy = N0.getValueType();

5153

5154// Fold bit comparisons when we can. This will result in an

5155// incorrect value when boolean false is negative one, unless

5156// the bitsize is 1 in which case the false value is the same

5157// in practice regardless of the representation.

5158if ((VT.getSizeInBits() == 1 ||

5159getBooleanContents(N0.getValueType()) ==ZeroOrOneBooleanContent) &&

5160 (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

5161 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&

5162 N0.getOpcode() ==ISD::AND) {

5163if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

5164if (Cond ==ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3

5165// Perform the xform if the AND RHS is a single bit.

5166unsigned ShCt = AndRHS->getAPIntValue().logBase2();

5167if (AndRHS->getAPIntValue().isPowerOf2() &&

5168 !shouldAvoidTransformToShift(ShValTy, ShCt)) {

5169return DAG.getNode(

5170ISD::TRUNCATE, dl, VT,

5171 DAG.getNode(ISD::SRL, dl, ShValTy, N0,

5172 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));

5173 }

5174 }elseif (Cond ==ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {

5175// (X & 8) == 8 --> (X & 8) >> 3

5176// Perform the xform if C1 is a single bit.

5177unsigned ShCt = C1.logBase2();

5178if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {

5179return DAG.getNode(

5180ISD::TRUNCATE, dl, VT,

5181 DAG.getNode(ISD::SRL, dl, ShValTy, N0,

5182 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));

5183 }

5184 }

5185 }

5186 }

5187

5188if (C1.getSignificantBits() <= 64 &&

5189 !isLegalICmpImmediate(C1.getSExtValue())) {

5190// (X & -256) == 256 -> (X >> 8) == 1

5191if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

5192 N0.getOpcode() ==ISD::AND && N0.hasOneUse()) {

5193if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

5194constAPInt &AndRHSC = AndRHS->getAPIntValue();

5195if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {

5196unsigned ShiftBits = AndRHSC.countr_zero();

5197if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {

5198SDValue Shift = DAG.getNode(

5199ISD::SRL, dl, ShValTy, N0.getOperand(0),

5200 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));

5201SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);

5202return DAG.getSetCC(dl, VT, Shift, CmpRHS,Cond);

5203 }

5204 }

5205 }

5206 }elseif (Cond ==ISD::SETULT ||Cond ==ISD::SETUGE ||

5207Cond ==ISD::SETULE ||Cond ==ISD::SETUGT) {

5208bool AdjOne = (Cond ==ISD::SETULE ||Cond ==ISD::SETUGT);

5209// X < 0x100000000 -> (X >> 32) < 1

5210// X >= 0x100000000 -> (X >> 32) >= 1

5211// X <= 0x0ffffffff -> (X >> 32) < 1

5212// X > 0x0ffffffff -> (X >> 32) >= 1

5213unsigned ShiftBits;

5214APInt NewC = C1;

5215ISD::CondCode NewCond =Cond;

5216if (AdjOne) {

5217 ShiftBits = C1.countr_one();

5218 NewC = NewC + 1;

5219 NewCond = (Cond ==ISD::SETULE) ?ISD::SETULT :ISD::SETUGE;

5220 }else {

5221 ShiftBits = C1.countr_zero();

5222 }

5223 NewC.lshrInPlace(ShiftBits);

5224if (ShiftBits && NewC.getSignificantBits() <= 64 &&

5225isLegalICmpImmediate(NewC.getSExtValue()) &&

5226 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {

5227SDValue Shift =

5228 DAG.getNode(ISD::SRL, dl, ShValTy, N0,

5229 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));

5230SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);

5231return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);

5232 }

5233 }

5234 }

5235 }

5236

5237if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {

5238auto *CFP = cast<ConstantFPSDNode>(N1);

5239assert(!CFP->getValueAPF().isNaN() &&"Unexpected NaN value");

5240

5241// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the

5242// constant if knowing that the operand is non-nan is enough. We prefer to

5243// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to

5244// materialize 0.0.

5245if (Cond ==ISD::SETO ||Cond ==ISD::SETUO)

5246return DAG.getSetCC(dl, VT, N0, N0,Cond);

5247

5248// setcc (fneg x), C -> setcc swap(pred) x, -C

5249if (N0.getOpcode() ==ISD::FNEG) {

5250ISD::CondCode SwapCond =ISD::getSetCCSwappedOperands(Cond);

5251if (DCI.isBeforeLegalizeOps() ||

5252isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {

5253SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);

5254return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);

5255 }

5256 }

5257

5258// setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf

5259if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&

5260 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {

5261bool IsFabs = N0.getOpcode() ==ISD::FABS;

5262SDValue Op = IsFabs ? N0.getOperand(0) : N0;

5263if ((Cond ==ISD::SETOEQ ||Cond ==ISD::SETUEQ) && CFP->isInfinity()) {

5264FPClassTest Flag = CFP->isNegative() ? (IsFabs ?fcNone :fcNegInf)

5265 : (IsFabs ?fcInf :fcPosInf);

5266if (Cond ==ISD::SETUEQ)

5267 Flag |=fcNan;

5268return DAG.getNode(ISD::IS_FPCLASS, dl, VT,Op,

5269 DAG.getTargetConstant(Flag, dl, MVT::i32));

5270 }

5271 }

5272

5273// If the condition is not legal, see if we can find an equivalent one

5274// which is legal.

5275if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {

5276// If the comparison was an awkward floating-point == or != and one of

5277// the comparison operands is infinity or negative infinity, convert the

5278// condition to a less-awkward <= or >=.

5279if (CFP->getValueAPF().isInfinity()) {

5280bool IsNegInf = CFP->getValueAPF().isNegative();

5281ISD::CondCode NewCond =ISD::SETCC_INVALID;

5282switch (Cond) {

5283caseISD::SETOEQ: NewCond = IsNegInf ?ISD::SETOLE :ISD::SETOGE;break;

5284caseISD::SETUEQ: NewCond = IsNegInf ?ISD::SETULE :ISD::SETUGE;break;

5285caseISD::SETUNE: NewCond = IsNegInf ?ISD::SETUGT :ISD::SETULT;break;

5286caseISD::SETONE: NewCond = IsNegInf ?ISD::SETOGT :ISD::SETOLT;break;

5287default:break;

5288 }

5289if (NewCond !=ISD::SETCC_INVALID &&

5290isCondCodeLegal(NewCond, N0.getSimpleValueType()))

5291return DAG.getSetCC(dl, VT, N0, N1, NewCond);

5292 }

5293 }

5294 }

5295

5296if (N0 == N1) {

5297// The sext(setcc()) => setcc() optimization relies on the appropriate

5298// constant being emitted.

5299assert(!N0.getValueType().isInteger() &&

5300"Integer types should be handled by FoldSetCC");

5301

5302bool EqTrue =ISD::isTrueWhenEqual(Cond);

5303unsigned UOF =ISD::getUnorderedFlavor(Cond);

5304if (UOF == 2)// FP operators that are undefined on NaNs.

5305return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);

5306if (UOF ==unsigned(EqTrue))

5307return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);

5308// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO

5309// if it is not already.

5310ISD::CondCode NewCond = UOF == 0 ?ISD::SETO :ISD::SETUO;

5311if (NewCond !=Cond &&

5312 (DCI.isBeforeLegalizeOps() ||

5313isCondCodeLegal(NewCond, N0.getSimpleValueType())))

5314return DAG.getSetCC(dl, VT, N0, N1, NewCond);

5315 }

5316

5317// ~X > ~Y --> Y > X

5318// ~X < ~Y --> Y < X

5319// ~X < C --> X > ~C

5320// ~X > C --> X < ~C

5321if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&

5322 N0.getValueType().isInteger()) {

5323if (isBitwiseNot(N0)) {

5324if (isBitwiseNot(N1))

5325return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0),Cond);

5326

5327if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&

5328 !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {

5329SDValue Not = DAG.getNOT(dl, N1, OpVT);

5330return DAG.getSetCC(dl, VT, Not, N0.getOperand(0),Cond);

5331 }

5332 }

5333 }

5334

5335if ((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

5336 N0.getValueType().isInteger()) {

5337if (N0.getOpcode() ==ISD::ADD || N0.getOpcode() ==ISD::SUB ||

5338 N0.getOpcode() ==ISD::XOR) {

5339// Simplify (X+Y) == (X+Z) --> Y == Z

5340if (N0.getOpcode() == N1.getOpcode()) {

5341if (N0.getOperand(0) == N1.getOperand(0))

5342return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1),Cond);

5343if (N0.getOperand(1) == N1.getOperand(1))

5344return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0),Cond);

5345if (isCommutativeBinOp(N0.getOpcode())) {

5346// If X op Y == Y op X, try other combinations.

5347if (N0.getOperand(0) == N1.getOperand(1))

5348return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),

5349Cond);

5350if (N0.getOperand(1) == N1.getOperand(0))

5351return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),

5352Cond);

5353 }

5354 }

5355

5356// If RHS is a legal immediate value for a compare instruction, we need

5357// to be careful about increasing register pressure needlessly.

5358bool LegalRHSImm =false;

5359

5360if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {

5361if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

5362// Turn (X+C1) == C2 --> X == C2-C1

5363if (N0.getOpcode() ==ISD::ADD && N0.getNode()->hasOneUse())

5364return DAG.getSetCC(

5365 dl, VT, N0.getOperand(0),

5366 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),

5367 dl, N0.getValueType()),

5368Cond);

5369

5370// Turn (X^C1) == C2 --> X == C1^C2

5371if (N0.getOpcode() ==ISD::XOR && N0.getNode()->hasOneUse())

5372return DAG.getSetCC(

5373 dl, VT, N0.getOperand(0),

5374 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),

5375 dl, N0.getValueType()),

5376Cond);

5377 }

5378

5379// Turn (C1-X) == C2 --> X == C1-C2

5380if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))

5381if (N0.getOpcode() ==ISD::SUB && N0.getNode()->hasOneUse())

5382return DAG.getSetCC(

5383 dl, VT, N0.getOperand(1),

5384 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),

5385 dl, N0.getValueType()),

5386Cond);

5387

5388// Could RHSC fold directly into a compare?

5389if (RHSC->getValueType(0).getSizeInBits() <= 64)

5390 LegalRHSImm =isLegalICmpImmediate(RHSC->getSExtValue());

5391 }

5392

5393// (X+Y) == X --> Y == 0 and similar folds.

5394// Don't do this if X is an immediate that can fold into a cmp

5395// instruction and X+Y has other uses. It could be an induction variable

5396// chain, and the transform would increase register pressure.

5397if (!LegalRHSImm || N0.hasOneUse())

5398if (SDValue V = foldSetCCWithBinOp(VT, N0, N1,Cond, dl, DCI))

5399return V;

5400 }

5401

5402if (N1.getOpcode() ==ISD::ADD || N1.getOpcode() ==ISD::SUB ||

5403 N1.getOpcode() ==ISD::XOR)

5404if (SDValue V = foldSetCCWithBinOp(VT, N1, N0,Cond, dl, DCI))

5405return V;

5406

5407if (SDValue V = foldSetCCWithAnd(VT, N0, N1,Cond, dl, DCI))

5408return V;

5409 }

5410

5411// Fold remainder of division by a constant.

5412if ((N0.getOpcode() ==ISD::UREM || N0.getOpcode() ==ISD::SREM) &&

5413 N0.hasOneUse() && (Cond ==ISD::SETEQ ||Cond ==ISD::SETNE)) {

5414// When division is cheap or optimizing for minimum size,

5415// fall through to DIVREM creation by skipping this fold.

5416if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {

5417if (N0.getOpcode() ==ISD::UREM) {

5418if (SDValue Folded = buildUREMEqFold(VT, N0, N1,Cond, DCI, dl))

5419return Folded;

5420 }elseif (N0.getOpcode() ==ISD::SREM) {

5421if (SDValue Folded = buildSREMEqFold(VT, N0, N1,Cond, DCI, dl))

5422return Folded;

5423 }

5424 }

5425 }

5426

5427// Fold away ALL boolean setcc's.

5428if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {

5429SDValue Temp;

5430switch (Cond) {

5431default:llvm_unreachable("Unknown integer setcc!");

5432caseISD::SETEQ:// X == Y -> ~(X^Y)

5433 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);

5434 N0 = DAG.getNOT(dl, Temp, OpVT);

5435if (!DCI.isCalledByLegalizer())

5436 DCI.AddToWorklist(Temp.getNode());

5437break;

5438caseISD::SETNE:// X != Y --> (X^Y)

5439 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);

5440break;

5441caseISD::SETGT:// X >s Y --> X == 0 & Y == 1 --> ~X & Y

5442caseISD::SETULT:// X <u Y --> X == 0 & Y == 1 --> ~X & Y

5443 Temp = DAG.getNOT(dl, N0, OpVT);

5444 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);

5445if (!DCI.isCalledByLegalizer())

5446 DCI.AddToWorklist(Temp.getNode());

5447break;

5448caseISD::SETLT:// X <s Y --> X == 1 & Y == 0 --> ~Y & X

5449caseISD::SETUGT:// X >u Y --> X == 1 & Y == 0 --> ~Y & X

5450 Temp = DAG.getNOT(dl, N1, OpVT);

5451 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);

5452if (!DCI.isCalledByLegalizer())

5453 DCI.AddToWorklist(Temp.getNode());

5454break;

5455caseISD::SETULE:// X <=u Y --> X == 0 | Y == 1 --> ~X | Y

5456caseISD::SETGE:// X >=s Y --> X == 0 | Y == 1 --> ~X | Y

5457 Temp = DAG.getNOT(dl, N0, OpVT);

5458 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);

5459if (!DCI.isCalledByLegalizer())

5460 DCI.AddToWorklist(Temp.getNode());

5461break;

5462caseISD::SETUGE:// X >=u Y --> X == 1 | Y == 0 --> ~Y | X

5463caseISD::SETLE:// X <=s Y --> X == 1 | Y == 0 --> ~Y | X

5464 Temp = DAG.getNOT(dl, N1, OpVT);

5465 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);

5466break;

5467 }

5468if (VT.getScalarType() != MVT::i1) {

5469if (!DCI.isCalledByLegalizer())

5470 DCI.AddToWorklist(N0.getNode());

5471// FIXME: If running after legalize, we probably can't do this.

5472ISD::NodeType ExtendCode =getExtendForContent(getBooleanContents(OpVT));

5473 N0 = DAG.getNode(ExtendCode, dl, VT, N0);

5474 }

5475return N0;

5476 }

5477

5478// Could not fold it.

5479returnSDValue();

5480}

5481

5482/// Returns true (and the GlobalValue and the offset) if the node is a

5483/// GlobalAddress + offset.

5484boolTargetLowering::isGAPlusOffset(SDNode *WN,constGlobalValue *&GA,

5485 int64_t &Offset) const{

5486

5487SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();

5488

5489if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {

5490 GA = GASD->getGlobal();

5491Offset += GASD->getOffset();

5492returntrue;

5493 }

5494

5495if (N->getOpcode() ==ISD::ADD) {

5496SDValue N1 =N->getOperand(0);

5497SDValue N2 =N->getOperand(1);

5498if (isGAPlusOffset(N1.getNode(), GA,Offset)) {

5499if (auto *V = dyn_cast<ConstantSDNode>(N2)) {

5500Offset += V->getSExtValue();

5501returntrue;

5502 }

5503 }elseif (isGAPlusOffset(N2.getNode(), GA,Offset)) {

5504if (auto *V = dyn_cast<ConstantSDNode>(N1)) {

5505Offset += V->getSExtValue();

5506returntrue;

5507 }

5508 }

5509 }

5510

5511returnfalse;

5512}

5513

5514SDValue TargetLowering::PerformDAGCombine(SDNode *N,

5515DAGCombinerInfo &DCI) const{

5516// Default implementation: no optimization.

5517returnSDValue();

5518}

5519

5520//===----------------------------------------------------------------------===//

5521// Inline Assembler Implementation Methods

5522//===----------------------------------------------------------------------===//

5523

5524TargetLowering::ConstraintType

5525TargetLowering::getConstraintType(StringRef Constraint) const{

5526unsigned S = Constraint.size();

5527

5528if (S == 1) {

5529switch (Constraint[0]) {

5530default:break;

5531case'r':

5532return C_RegisterClass;

5533case'm':// memory

5534case'o':// offsetable

5535case'V':// not offsetable

5536return C_Memory;

5537case'p':// Address.

5538return C_Address;

5539case'n':// Simple Integer

5540case'E':// Floating Point Constant

5541case'F':// Floating Point Constant

5542return C_Immediate;

5543case'i':// Simple Integer or Relocatable Constant

5544case's':// Relocatable Constant

5545case'X':// Allow ANY value.

5546case'I':// Target registers.

5547case'J':

5548case'K':

5549case'L':

5550case'M':

5551case'N':

5552case'O':

5553case'P':

5554case'<':

5555case'>':

5556return C_Other;

5557 }

5558 }

5559

5560if (S > 1 && Constraint[0] =='{' && Constraint[S - 1] =='}') {

5561if (S == 8 && Constraint.substr(1, 6) =="memory")// "{memory}"

5562return C_Memory;

5563return C_Register;

5564 }

5565return C_Unknown;

5566}

5567

5568/// Try to replace an X constraint, which matches anything, with another that

5569/// has more specific requirements based on the type of the corresponding

5570/// operand.

5571constchar *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{

5572if (ConstraintVT.isInteger())

5573return"r";

5574if (ConstraintVT.isFloatingPoint())

5575return"f";// works for many targets

5576returnnullptr;

5577}

5578

5579SDValue TargetLowering::LowerAsmOutputForConstraint(

5580SDValue &Chain,SDValue &Glue,constSDLoc &DL,

5581constAsmOperandInfo &OpInfo,SelectionDAG &DAG) const{

5582returnSDValue();

5583}

5584

5585/// Lower the specified operand into the Ops vector.

5586/// If it is invalid, don't add anything to Ops.

5587voidTargetLowering::LowerAsmOperandForConstraint(SDValue Op,

5588StringRef Constraint,

5589 std::vector<SDValue> &Ops,

5590SelectionDAG &DAG) const{

5591

5592if (Constraint.size() > 1)

5593return;

5594

5595char ConstraintLetter = Constraint[0];

5596switch (ConstraintLetter) {

5597default:break;

5598case'X':// Allows any operand

5599case'i':// Simple Integer or Relocatable Constant

5600case'n':// Simple Integer

5601case's': {// Relocatable Constant

5602

5603ConstantSDNode *C;

5604uint64_t Offset = 0;

5605

5606// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),

5607// etc., since getelementpointer is variadic. We can't use

5608// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible

5609// while in this case the GA may be furthest from the root node which is

5610// likely an ISD::ADD.

5611while (true) {

5612if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter !='s') {

5613// gcc prints these as sign extended. Sign extend value to 64 bits

5614// now; without this it would get ZExt'd later in

5615// ScheduleDAGSDNodes::EmitNode, which is very generic.

5616bool IsBool =C->getConstantIntValue()->getBitWidth() == 1;

5617BooleanContent BCont =getBooleanContents(MVT::i64);

5618ISD::NodeType ExtOpc =

5619 IsBool ?getExtendForContent(BCont) :ISD::SIGN_EXTEND;

5620 int64_t ExtVal =

5621 ExtOpc ==ISD::ZERO_EXTEND ?C->getZExtValue() :C->getSExtValue();

5622 Ops.push_back(

5623 DAG.getTargetConstant(Offset + ExtVal,SDLoc(C), MVT::i64));

5624return;

5625 }

5626if (ConstraintLetter !='n') {

5627if (constauto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {

5628 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),SDLoc(Op),

5629 GA->getValueType(0),

5630Offset + GA->getOffset()));

5631return;

5632 }

5633if (constauto *BA = dyn_cast<BlockAddressSDNode>(Op)) {

5634 Ops.push_back(DAG.getTargetBlockAddress(

5635 BA->getBlockAddress(), BA->getValueType(0),

5636Offset + BA->getOffset(), BA->getTargetFlags()));

5637return;

5638 }

5639if (isa<BasicBlockSDNode>(Op)) {

5640 Ops.push_back(Op);

5641return;

5642 }

5643 }

5644constunsigned OpCode =Op.getOpcode();

5645if (OpCode ==ISD::ADD || OpCode ==ISD::SUB) {

5646if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))

5647Op =Op.getOperand(1);

5648// Subtraction is not commutative.

5649elseif (OpCode ==ISD::ADD &&

5650 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))

5651Op =Op.getOperand(0);

5652else

5653return;

5654Offset += (OpCode ==ISD::ADD ? 1 : -1) *C->getSExtValue();

5655continue;

5656 }

5657return;

5658 }

5659break;

5660 }

5661 }

5662}

5663

5664voidTargetLowering::CollectTargetIntrinsicOperands(

5665constCallInst &I,SmallVectorImpl<SDValue> &Ops,SelectionDAG &DAG) const{

5666}

5667

5668std::pair<unsigned, const TargetRegisterClass *>

5669TargetLowering::getRegForInlineAsmConstraint(constTargetRegisterInfo *RI,

5670StringRef Constraint,

5671MVT VT) const{

5672if (!Constraint.starts_with("{"))

5673return std::make_pair(0u,static_cast<TargetRegisterClass *>(nullptr));

5674assert(*(Constraint.end() - 1) =='}' &&"Not a brace enclosed constraint?");

5675

5676// Remove the braces from around the name.

5677StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);

5678

5679 std::pair<unsigned, const TargetRegisterClass *> R =

5680 std::make_pair(0u,static_cast<constTargetRegisterClass *>(nullptr));

5681

5682// Figure out which register class contains this reg.

5683for (constTargetRegisterClass *RC : RI->regclasses()) {

5684// If none of the value types for this register class are valid, we

5685// can't use it. For example, 64-bit reg classes on 32-bit targets.

5686if (!isLegalRC(*RI, *RC))

5687continue;

5688

5689for (constMCPhysReg &PR : *RC) {

5690if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {

5691 std::pair<unsigned, const TargetRegisterClass *> S =

5692 std::make_pair(PR, RC);

5693

5694// If this register class has the requested value type, return it,

5695// otherwise keep searching and return the first class found

5696// if no other is found which explicitly has the requested type.

5697if (RI->isTypeLegalForClass(*RC, VT))

5698return S;

5699if (!R.second)

5700 R = S;

5701 }

5702 }

5703 }

5704

5705return R;

5706}

5707

5708//===----------------------------------------------------------------------===//

5709// Constraint Selection.

5710

5711/// Return true of this is an input operand that is a matching constraint like

5712/// "4".

5713boolTargetLowering::AsmOperandInfo::isMatchingInputConstraint() const{

5714assert(!ConstraintCode.empty() &&"No known constraint!");

5715return isdigit(static_cast<unsignedchar>(ConstraintCode[0]));

5716}

5717

5718/// If this is an input matching constraint, this method returns the output

5719/// operand it matches.

5720unsignedTargetLowering::AsmOperandInfo::getMatchedOperand() const{

5721assert(!ConstraintCode.empty() &&"No known constraint!");

5722return atoi(ConstraintCode.c_str());

5723}

5724

5725/// Split up the constraint string from the inline assembly value into the

5726/// specific constraints and their prefixes, and also tie in the associated

5727/// operand values.

5728/// If this returns an empty vector, and if the constraint string itself

5729/// isn't empty, there was an error parsing.

5730TargetLowering::AsmOperandInfoVector

5731TargetLowering::ParseConstraints(constDataLayout &DL,

5732constTargetRegisterInfo *TRI,

5733constCallBase &Call) const{

5734 /// Information about all of the constraints.

5735AsmOperandInfoVector ConstraintOperands;

5736constInlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());

5737unsigned maCount = 0;// Largest number of multiple alternative constraints.

5738

5739// Do a prepass over the constraints, canonicalizing them, and building up the

5740// ConstraintOperands list.

5741unsigned ArgNo = 0;// ArgNo - The argument of the CallInst.

5742unsigned ResNo = 0;// ResNo - The result number of the next output.

5743unsigned LabelNo = 0;// LabelNo - CallBr indirect dest number.

5744

5745for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {

5746 ConstraintOperands.emplace_back(std::move(CI));

5747AsmOperandInfo &OpInfo = ConstraintOperands.back();

5748

5749// Update multiple alternative constraint count.

5750if (OpInfo.multipleAlternatives.size() > maCount)

5751 maCount = OpInfo.multipleAlternatives.size();

5752

5753 OpInfo.ConstraintVT = MVT::Other;

5754

5755// Compute the value type for each operand.

5756switch (OpInfo.Type) {

5757caseInlineAsm::isOutput:

5758// Indirect outputs just consume an argument.

5759if (OpInfo.isIndirect) {

5760 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);

5761break;

5762 }

5763

5764// The return value of the call is this value. As such, there is no

5765// corresponding argument.

5766assert(!Call.getType()->isVoidTy() &&"Bad inline asm!");

5767if (auto *STy = dyn_cast<StructType>(Call.getType())) {

5768 OpInfo.ConstraintVT =

5769getAsmOperandValueType(DL, STy->getElementType(ResNo))

5770 .getSimpleVT();

5771 }else {

5772assert(ResNo == 0 &&"Asm only has one result!");

5773 OpInfo.ConstraintVT =

5774getAsmOperandValueType(DL, Call.getType()).getSimpleVT();

5775 }

5776 ++ResNo;

5777break;

5778caseInlineAsm::isInput:

5779 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);

5780break;

5781caseInlineAsm::isLabel:

5782 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);

5783 ++LabelNo;

5784continue;

5785caseInlineAsm::isClobber:

5786// Nothing to do.

5787break;

5788 }

5789

5790if (OpInfo.CallOperandVal) {

5791llvm::Type *OpTy = OpInfo.CallOperandVal->getType();

5792if (OpInfo.isIndirect) {

5793 OpTy = Call.getParamElementType(ArgNo);

5794assert(OpTy &&"Indirect operand must have elementtype attribute");

5795 }

5796

5797// Look for vector wrapped in a struct. e.g. { <16 x i8> }.

5798if (StructType *STy = dyn_cast<StructType>(OpTy))

5799if (STy->getNumElements() == 1)

5800 OpTy = STy->getElementType(0);

5801

5802// If OpTy is not a single value, it may be a struct/union that we

5803// can tile with integers.

5804if (!OpTy->isSingleValueType() && OpTy->isSized()) {

5805unsigned BitSize =DL.getTypeSizeInBits(OpTy);

5806switch (BitSize) {

5807default:break;

5808case 1:

5809case 8:

5810case 16:

5811case 32:

5812case 64:

5813case 128:

5814 OpTy =IntegerType::get(OpTy->getContext(), BitSize);

5815break;

5816 }

5817 }

5818

5819EVT VT =getAsmOperandValueType(DL, OpTy,true);

5820 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;

5821 ArgNo++;

5822 }

5823 }

5824

5825// If we have multiple alternative constraints, select the best alternative.

5826if (!ConstraintOperands.empty()) {

5827if (maCount) {

5828unsigned bestMAIndex = 0;

5829int bestWeight = -1;

5830// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.

5831int weight = -1;

5832unsigned maIndex;

5833// Compute the sums of the weights for each alternative, keeping track

5834// of the best (highest weight) one so far.

5835for (maIndex = 0; maIndex < maCount; ++maIndex) {

5836int weightSum = 0;

5837for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();

5838 cIndex != eIndex; ++cIndex) {

5839AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];

5840if (OpInfo.Type ==InlineAsm::isClobber)

5841continue;

5842

5843// If this is an output operand with a matching input operand,

5844// look up the matching input. If their types mismatch, e.g. one

5845// is an integer, the other is floating point, or their sizes are

5846// different, flag it as an maCantMatch.

5847if (OpInfo.hasMatchingInput()) {

5848AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];

5849if (OpInfo.ConstraintVT != Input.ConstraintVT) {

5850if ((OpInfo.ConstraintVT.isInteger() !=

5851 Input.ConstraintVT.isInteger()) ||

5852 (OpInfo.ConstraintVT.getSizeInBits() !=

5853 Input.ConstraintVT.getSizeInBits())) {

5854 weightSum = -1;// Can't match.

5855break;

5856 }

5857 }

5858 }

5859 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);

5860if (weight == -1) {

5861 weightSum = -1;

5862break;

5863 }

5864 weightSum += weight;

5865 }

5866// Update best.

5867if (weightSum > bestWeight) {

5868 bestWeight = weightSum;

5869 bestMAIndex = maIndex;

5870 }

5871 }

5872

5873// Now select chosen alternative in each constraint.

5874for (AsmOperandInfo &cInfo : ConstraintOperands)

5875if (cInfo.Type !=InlineAsm::isClobber)

5876 cInfo.selectAlternative(bestMAIndex);

5877 }

5878 }

5879

5880// Check and hook up tied operands, choose constraint code to use.

5881for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();

5882 cIndex != eIndex; ++cIndex) {

5883AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];

5884

5885// If this is an output operand with a matching input operand, look up the

5886// matching input. If their types mismatch, e.g. one is an integer, the

5887// other is floating point, or their sizes are different, flag it as an

5888// error.

5889if (OpInfo.hasMatchingInput()) {

5890AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];

5891

5892if (OpInfo.ConstraintVT != Input.ConstraintVT) {

5893 std::pair<unsigned, const TargetRegisterClass *> MatchRC =

5894 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,

5895 OpInfo.ConstraintVT);

5896 std::pair<unsigned, const TargetRegisterClass *> InputRC =

5897 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,

5898 Input.ConstraintVT);

5899constbool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||

5900 OpInfo.ConstraintVT.isFloatingPoint();

5901constbool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||

5902 Input.ConstraintVT.isFloatingPoint();

5903if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||

5904 (MatchRC.second != InputRC.second)) {

5905report_fatal_error("Unsupported asm: input constraint"

5906" with a matching output constraint of"

5907" incompatible type!");

5908 }

5909 }

5910 }

5911 }

5912

5913return ConstraintOperands;

5914}

5915

5916/// Return a number indicating our preference for chosing a type of constraint

5917/// over another, for the purpose of sorting them. Immediates are almost always

5918/// preferrable (when they can be emitted). A higher return value means a

5919/// stronger preference for one constraint type relative to another.

5920/// FIXME: We should prefer registers over memory but doing so may lead to

5921/// unrecoverable register exhaustion later.

5922/// https://github.com/llvm/llvm-project/issues/20571

5923staticunsignedgetConstraintPiority(TargetLowering::ConstraintType CT) {

5924switch (CT) {

5925caseTargetLowering::C_Immediate:

5926caseTargetLowering::C_Other:

5927return 4;

5928caseTargetLowering::C_Memory:

5929caseTargetLowering::C_Address:

5930return 3;

5931caseTargetLowering::C_RegisterClass:

5932return 2;

5933caseTargetLowering::C_Register:

5934return 1;

5935caseTargetLowering::C_Unknown:

5936return 0;

5937 }

5938llvm_unreachable("Invalid constraint type");

5939}

5940

5941/// Examine constraint type and operand type and determine a weight value.

5942/// This object must already have been set up with the operand type

5943/// and the current alternative constraint selected.

5944TargetLowering::ConstraintWeight

5945TargetLowering::getMultipleConstraintMatchWeight(

5946AsmOperandInfo &info,int maIndex) const{

5947InlineAsm::ConstraintCodeVector *rCodes;

5948if (maIndex >= (int)info.multipleAlternatives.size())

5949 rCodes = &info.Codes;

5950else

5951 rCodes = &info.multipleAlternatives[maIndex].Codes;

5952ConstraintWeight BestWeight = CW_Invalid;

5953

5954// Loop over the options, keeping track of the most general one.

5955for (const std::string &rCode : *rCodes) {

5956ConstraintWeight weight =

5957 getSingleConstraintMatchWeight(info, rCode.c_str());

5958if (weight > BestWeight)

5959 BestWeight = weight;

5960 }

5961

5962return BestWeight;

5963}

5964

5965/// Examine constraint type and operand type and determine a weight value.

5966/// This object must already have been set up with the operand type

5967/// and the current alternative constraint selected.

5968TargetLowering::ConstraintWeight

5969TargetLowering::getSingleConstraintMatchWeight(

5970AsmOperandInfo &info,constchar *constraint) const{

5971ConstraintWeight weight = CW_Invalid;

5972Value *CallOperandVal =info.CallOperandVal;

5973// If we don't have a value, we can't do a match,

5974// but allow it at the lowest weight.

5975if (!CallOperandVal)

5976return CW_Default;

5977// Look at the constraint type.

5978switch (*constraint) {

5979case'i':// immediate integer.

5980case'n':// immediate integer with a known value.

5981if (isa<ConstantInt>(CallOperandVal))

5982 weight = CW_Constant;

5983break;

5984case's':// non-explicit intregal immediate.

5985if (isa<GlobalValue>(CallOperandVal))

5986 weight = CW_Constant;

5987break;

5988case'E':// immediate float if host format.

5989case'F':// immediate float.

5990if (isa<ConstantFP>(CallOperandVal))

5991 weight = CW_Constant;

5992break;

5993case'<':// memory operand with autodecrement.

5994case'>':// memory operand with autoincrement.

5995case'm':// memory operand.

5996case'o':// offsettable memory operand

5997case'V':// non-offsettable memory operand

5998 weight = CW_Memory;

5999break;

6000case'r':// general register.

6001case'g':// general register, memory operand or immediate integer.

6002// note: Clang converts "g" to "imr".

6003if (CallOperandVal->getType()->isIntegerTy())

6004 weight = CW_Register;

6005break;

6006case'X':// any operand.

6007default:

6008 weight = CW_Default;

6009break;

6010 }

6011return weight;

6012}

6013

6014/// If there are multiple different constraints that we could pick for this

6015/// operand (e.g. "imr") try to pick the 'best' one.

6016/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall

6017/// into seven classes:

6018/// Register -> one specific register

6019/// RegisterClass -> a group of regs

6020/// Memory -> memory

6021/// Address -> a symbolic memory reference

6022/// Immediate -> immediate values

6023/// Other -> magic values (such as "Flag Output Operands")

6024/// Unknown -> something we don't recognize yet and can't handle

6025/// Ideally, we would pick the most specific constraint possible: if we have

6026/// something that fits into a register, we would pick it. The problem here

6027/// is that if we have something that could either be in a register or in

6028/// memory that use of the register could cause selection of *other*

6029/// operands to fail: they might only succeed if we pick memory. Because of

6030/// this the heuristic we use is:

6031///

6032/// 1) If there is an 'other' constraint, and if the operand is valid for

6033/// that constraint, use it. This makes us take advantage of 'i'

6034/// constraints when available.

6035/// 2) Otherwise, pick the most general constraint present. This prefers

6036/// 'm' over 'r', for example.

6037///

6038TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(

6039TargetLowering::AsmOperandInfo &OpInfo) const{

6040ConstraintGroup Ret;

6041

6042 Ret.reserve(OpInfo.Codes.size());

6043for (StringRef Code : OpInfo.Codes) {

6044TargetLowering::ConstraintType CType = getConstraintType(Code);

6045

6046// Indirect 'other' or 'immediate' constraints are not allowed.

6047if (OpInfo.isIndirect && !(CType ==TargetLowering::C_Memory ||

6048 CType ==TargetLowering::C_Register ||

6049 CType ==TargetLowering::C_RegisterClass))

6050continue;

6051

6052// Things with matching constraints can only be registers, per gcc

6053// documentation. This mainly affects "g" constraints.

6054if (CType ==TargetLowering::C_Memory && OpInfo.hasMatchingInput())

6055continue;

6056

6057 Ret.emplace_back(Code, CType);

6058 }

6059

6060 std::stable_sort(

6061 Ret.begin(), Ret.end(), [](ConstraintPair a,ConstraintPair b) {

6062 return getConstraintPiority(a.second) > getConstraintPiority(b.second);

6063 });

6064

6065return Ret;

6066}

6067

6068/// If we have an immediate, see if we can lower it. Return true if we can,

6069/// false otherwise.

6070staticboollowerImmediateIfPossible(TargetLowering::ConstraintPair &P,

6071SDValue Op,SelectionDAG *DAG,

6072constTargetLowering &TLI) {

6073

6074assert((P.second ==TargetLowering::C_Other ||

6075P.second ==TargetLowering::C_Immediate) &&

6076"need immediate or other");

6077

6078if (!Op.getNode())

6079returnfalse;

6080

6081 std::vector<SDValue> ResultOps;

6082 TLI.LowerAsmOperandForConstraint(Op,P.first, ResultOps, *DAG);

6083return !ResultOps.empty();

6084}

6085

6086/// Determines the constraint code and constraint type to use for the specific

6087/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.

6088voidTargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,

6089SDValue Op,

6090SelectionDAG *DAG) const{

6091assert(!OpInfo.Codes.empty() &&"Must have at least one constraint");

6092

6093// Single-letter constraints ('r') are very common.

6094if (OpInfo.Codes.size() == 1) {

6095 OpInfo.ConstraintCode = OpInfo.Codes[0];

6096 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);

6097 }else {

6098ConstraintGroup G = getConstraintPreferences(OpInfo);

6099if (G.empty())

6100return;

6101

6102unsigned BestIdx = 0;

6103for (constunsigned E =G.size();

6104 BestIdx < E && (G[BestIdx].second ==TargetLowering::C_Other ||

6105G[BestIdx].second ==TargetLowering::C_Immediate);

6106 ++BestIdx) {

6107if (lowerImmediateIfPossible(G[BestIdx],Op, DAG, *this))

6108break;

6109// If we're out of constraints, just pick the first one.

6110if (BestIdx + 1 == E) {

6111 BestIdx = 0;

6112break;

6113 }

6114 }

6115

6116 OpInfo.ConstraintCode =G[BestIdx].first;

6117 OpInfo.ConstraintType =G[BestIdx].second;

6118 }

6119

6120// 'X' matches anything.

6121if (OpInfo.ConstraintCode =="X" && OpInfo.CallOperandVal) {

6122// Constants are handled elsewhere. For Functions, the type here is the

6123// type of the result, which is not what we want to look at; leave them

6124// alone.

6125Value *v = OpInfo.CallOperandVal;

6126if (isa<ConstantInt>(v) || isa<Function>(v)) {

6127return;

6128 }

6129

6130if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {

6131 OpInfo.ConstraintCode ="i";

6132return;

6133 }

6134

6135// Otherwise, try to resolve it to something we know about by looking at

6136// the actual operand type.

6137if (constchar *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {

6138 OpInfo.ConstraintCode = Repl;

6139 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);

6140 }

6141 }

6142}

6143

6144/// Given an exact SDIV by a constant, create a multiplication

6145/// with the multiplicative inverse of the constant.

6146/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242

6147staticSDValue BuildExactSDIV(constTargetLowering &TLI,SDNode *N,

6148constSDLoc &dl,SelectionDAG &DAG,

6149SmallVectorImpl<SDNode *> &Created) {

6150SDValue Op0 =N->getOperand(0);

6151SDValue Op1 =N->getOperand(1);

6152EVT VT =N->getValueType(0);

6153EVT SVT = VT.getScalarType();

6154EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());

6155EVT ShSVT = ShVT.getScalarType();

6156

6157bool UseSRA =false;

6158SmallVector<SDValue, 16> Shifts, Factors;

6159

6160auto BuildSDIVPattern = [&](ConstantSDNode *C) {

6161if (C->isZero())

6162returnfalse;

6163APInt Divisor =C->getAPIntValue();

6164unsigned Shift = Divisor.countr_zero();

6165if (Shift) {

6166 Divisor.ashrInPlace(Shift);

6167 UseSRA =true;

6168 }

6169APInt Factor = Divisor.multiplicativeInverse();

6170 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));

6171 Factors.push_back(DAG.getConstant(Factor, dl, SVT));

6172returntrue;

6173 };

6174

6175// Collect all magic values from the build vector.

6176if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))

6177returnSDValue();

6178

6179SDValue Shift, Factor;

6180if (Op1.getOpcode() ==ISD::BUILD_VECTOR) {

6181 Shift = DAG.getBuildVector(ShVT, dl, Shifts);

6182 Factor = DAG.getBuildVector(VT, dl, Factors);

6183 }elseif (Op1.getOpcode() ==ISD::SPLAT_VECTOR) {

6184assert(Shifts.size() == 1 && Factors.size() == 1 &&

6185"Expected matchUnaryPredicate to return one element for scalable "

6186"vectors");

6187 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);

6188 Factor = DAG.getSplatVector(VT, dl, Factors[0]);

6189 }else {

6190assert(isa<ConstantSDNode>(Op1) &&"Expected a constant");

6191 Shift = Shifts[0];

6192 Factor = Factors[0];

6193 }

6194

6195SDValue Res = Op0;

6196if (UseSRA) {

6197 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift,SDNodeFlags::Exact);

6198 Created.push_back(Res.getNode());

6199 }

6200

6201return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);

6202}

6203

6204/// Given an exact UDIV by a constant, create a multiplication

6205/// with the multiplicative inverse of the constant.

6206/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242

6207staticSDValue BuildExactUDIV(constTargetLowering &TLI,SDNode *N,

6208constSDLoc &dl,SelectionDAG &DAG,

6209SmallVectorImpl<SDNode *> &Created) {

6210EVT VT =N->getValueType(0);

6211EVT SVT = VT.getScalarType();

6212EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());

6213EVT ShSVT = ShVT.getScalarType();

6214

6215bool UseSRL =false;

6216SmallVector<SDValue, 16> Shifts, Factors;

6217

6218auto BuildUDIVPattern = [&](ConstantSDNode *C) {

6219if (C->isZero())

6220returnfalse;

6221APInt Divisor =C->getAPIntValue();

6222unsigned Shift = Divisor.countr_zero();

6223if (Shift) {

6224 Divisor.lshrInPlace(Shift);

6225 UseSRL =true;

6226 }

6227// Calculate the multiplicative inverse modulo BW.

6228APInt Factor = Divisor.multiplicativeInverse();

6229 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));

6230 Factors.push_back(DAG.getConstant(Factor, dl, SVT));

6231returntrue;

6232 };

6233

6234SDValue Op1 =N->getOperand(1);

6235

6236// Collect all magic values from the build vector.

6237if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))

6238returnSDValue();

6239

6240SDValue Shift, Factor;

6241if (Op1.getOpcode() ==ISD::BUILD_VECTOR) {

6242 Shift = DAG.getBuildVector(ShVT, dl, Shifts);

6243 Factor = DAG.getBuildVector(VT, dl, Factors);

6244 }elseif (Op1.getOpcode() ==ISD::SPLAT_VECTOR) {

6245assert(Shifts.size() == 1 && Factors.size() == 1 &&

6246"Expected matchUnaryPredicate to return one element for scalable "

6247"vectors");

6248 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);

6249 Factor = DAG.getSplatVector(VT, dl, Factors[0]);

6250 }else {

6251assert(isa<ConstantSDNode>(Op1) &&"Expected a constant");

6252 Shift = Shifts[0];

6253 Factor = Factors[0];

6254 }

6255

6256SDValue Res =N->getOperand(0);

6257if (UseSRL) {

6258 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift,SDNodeFlags::Exact);

6259 Created.push_back(Res.getNode());

6260 }

6261

6262return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);

6263}

6264

6265SDValue TargetLowering::BuildSDIVPow2(SDNode *N,constAPInt &Divisor,

6266SelectionDAG &DAG,

6267SmallVectorImpl<SDNode *> &Created) const{

6268AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

6269if (isIntDivCheap(N->getValueType(0), Attr))

6270returnSDValue(N, 0);// Lower SDIV as SDIV

6271returnSDValue();

6272}

6273

6274SDValue

6275TargetLowering::BuildSREMPow2(SDNode *N,constAPInt &Divisor,

6276SelectionDAG &DAG,

6277SmallVectorImpl<SDNode *> &Created) const{

6278AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

6279if (isIntDivCheap(N->getValueType(0), Attr))

6280returnSDValue(N, 0);// Lower SREM as SREM

6281returnSDValue();

6282}

6283

6284/// Build sdiv by power-of-2 with conditional move instructions

6285/// Ref: "Hacker's Delight" by Henry Warren 10-1

6286/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:

6287/// bgez x, label

6288/// add x, x, 2**k-1

6289/// label:

6290/// sra res, x, k

6291/// neg res, res (when the divisor is negative)

6292SDValue TargetLowering::buildSDIVPow2WithCMov(

6293SDNode *N,constAPInt &Divisor,SelectionDAG &DAG,

6294SmallVectorImpl<SDNode *> &Created) const{

6295unsigned Lg2 = Divisor.countr_zero();

6296EVT VT =N->getValueType(0);

6297

6298SDLoc DL(N);

6299SDValue N0 =N->getOperand(0);

6300SDValue Zero = DAG.getConstant(0,DL, VT);

6301APInt Lg2Mask =APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);

6302SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask,DL, VT);

6303

6304// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.

6305EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

6306SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero,ISD::SETLT);

6307SDValue Add = DAG.getNode(ISD::ADD,DL, VT, N0, Pow2MinusOne);

6308SDValue CMov = DAG.getNode(ISD::SELECT,DL, VT, Cmp,Add, N0);

6309

6310 Created.push_back(Cmp.getNode());

6311 Created.push_back(Add.getNode());

6312 Created.push_back(CMov.getNode());

6313

6314// Divide by pow2.

6315SDValue SRA =

6316 DAG.getNode(ISD::SRA,DL, VT, CMov, DAG.getConstant(Lg2,DL, VT));

6317

6318// If we're dividing by a positive value, we're done. Otherwise, we must

6319// negate the result.

6320if (Divisor.isNonNegative())

6321return SRA;

6322

6323 Created.push_back(SRA.getNode());

6324return DAG.getNode(ISD::SUB,DL, VT, Zero, SRA);

6325}

6326

6327/// Given an ISD::SDIV node expressing a divide by constant,

6328/// return a DAG expression to select that will generate the same value by

6329/// multiplying by a magic number.

6330/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".

6331SDValue TargetLowering::BuildSDIV(SDNode *N,SelectionDAG &DAG,

6332bool IsAfterLegalization,

6333bool IsAfterLegalTypes,

6334SmallVectorImpl<SDNode *> &Created) const{

6335SDLoc dl(N);

6336EVT VT =N->getValueType(0);

6337EVT SVT = VT.getScalarType();

6338EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());

6339EVT ShSVT = ShVT.getScalarType();

6340unsigned EltBits = VT.getScalarSizeInBits();

6341EVT MulVT;

6342

6343// Check to see if we can do this.

6344// FIXME: We should be more aggressive here.

6345if (!isTypeLegal(VT)) {

6346// Limit this to simple scalars for now.

6347if (VT.isVector() || !VT.isSimple())

6348returnSDValue();

6349

6350// If this type will be promoted to a large enough type with a legal

6351// multiply operation, we can go ahead and do this transform.

6352if (getTypeAction(VT.getSimpleVT()) !=TypePromoteInteger)

6353returnSDValue();

6354

6355 MulVT =getTypeToTransformTo(*DAG.getContext(), VT);

6356if (MulVT.getSizeInBits() < (2 * EltBits) ||

6357 !isOperationLegal(ISD::MUL, MulVT))

6358returnSDValue();

6359 }

6360

6361// If the sdiv has an 'exact' bit we can use a simpler lowering.

6362if (N->getFlags().hasExact())

6363returnBuildExactSDIV(*this,N, dl, DAG, Created);

6364

6365SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;

6366

6367auto BuildSDIVPattern = [&](ConstantSDNode *C) {

6368if (C->isZero())

6369returnfalse;

6370

6371constAPInt &Divisor =C->getAPIntValue();

6372SignedDivisionByConstantInfo magics =SignedDivisionByConstantInfo::get(Divisor);

6373int NumeratorFactor = 0;

6374int ShiftMask = -1;

6375

6376if (Divisor.isOne() || Divisor.isAllOnes()) {

6377// If d is +1/-1, we just multiply the numerator by +1/-1.

6378 NumeratorFactor = Divisor.getSExtValue();

6379 magics.Magic = 0;

6380 magics.ShiftAmount = 0;

6381 ShiftMask = 0;

6382 }elseif (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {

6383// If d > 0 and m < 0, add the numerator.

6384 NumeratorFactor = 1;

6385 }elseif (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {

6386// If d < 0 and m > 0, subtract the numerator.

6387 NumeratorFactor = -1;

6388 }

6389

6390 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));

6391 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));

6392 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));

6393 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));

6394returntrue;

6395 };

6396

6397SDValue N0 =N->getOperand(0);

6398SDValue N1 =N->getOperand(1);

6399

6400// Collect the shifts / magic values from each element.

6401if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))

6402returnSDValue();

6403

6404SDValue MagicFactor, Factor, Shift, ShiftMask;

6405if (N1.getOpcode() ==ISD::BUILD_VECTOR) {

6406 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);

6407 Factor = DAG.getBuildVector(VT, dl, Factors);

6408 Shift = DAG.getBuildVector(ShVT, dl, Shifts);

6409 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);

6410 }elseif (N1.getOpcode() ==ISD::SPLAT_VECTOR) {

6411assert(MagicFactors.size() == 1 && Factors.size() == 1 &&

6412 Shifts.size() == 1 && ShiftMasks.size() == 1 &&

6413"Expected matchUnaryPredicate to return one element for scalable "

6414"vectors");

6415 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);

6416 Factor = DAG.getSplatVector(VT, dl, Factors[0]);

6417 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);

6418 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);

6419 }else {

6420assert(isa<ConstantSDNode>(N1) &&"Expected a constant");

6421 MagicFactor = MagicFactors[0];

6422 Factor = Factors[0];

6423 Shift = Shifts[0];

6424 ShiftMask = ShiftMasks[0];

6425 }

6426

6427// Multiply the numerator (operand 0) by the magic value.

6428// FIXME: We should support doing a MUL in a wider type.

6429auto GetMULHS = [&](SDValue X,SDValue Y) {

6430// If the type isn't legal, use a wider mul of the type calculated

6431// earlier.

6432if (!isTypeLegal(VT)) {

6433X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT,X);

6434Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT,Y);

6435Y = DAG.getNode(ISD::MUL, dl, MulVT,X,Y);

6436Y = DAG.getNode(ISD::SRL, dl, MulVT,Y,

6437 DAG.getShiftAmountConstant(EltBits, MulVT, dl));

6438return DAG.getNode(ISD::TRUNCATE, dl, VT,Y);

6439 }

6440

6441if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))

6442return DAG.getNode(ISD::MULHS, dl, VT,X,Y);

6443if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {

6444SDValue LoHi =

6445 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),X,Y);

6446returnSDValue(LoHi.getNode(), 1);

6447 }

6448// If type twice as wide legal, widen and use a mul plus a shift.

6449unsignedSize = VT.getScalarSizeInBits();

6450EVT WideVT =EVT::getIntegerVT(*DAG.getContext(),Size * 2);

6451if (VT.isVector())

6452 WideVT =EVT::getVectorVT(*DAG.getContext(), WideVT,

6453 VT.getVectorElementCount());

6454// Some targets like AMDGPU try to go from SDIV to SDIVREM which is then

6455// custom lowered. This is very expensive so avoid it at all costs for

6456// constant divisors.

6457if ((!IsAfterLegalTypes &&isOperationExpand(ISD::SDIV, VT) &&

6458isOperationCustom(ISD::SDIVREM, VT.getScalarType())) ||

6459isOperationLegalOrCustom(ISD::MUL, WideVT)) {

6460X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT,X);

6461Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT,Y);

6462Y = DAG.getNode(ISD::MUL, dl, WideVT,X,Y);

6463Y = DAG.getNode(ISD::SRL, dl, WideVT,Y,

6464 DAG.getShiftAmountConstant(EltBits, WideVT, dl));

6465return DAG.getNode(ISD::TRUNCATE, dl, VT,Y);

6466 }

6467returnSDValue();

6468 };

6469

6470SDValue Q = GetMULHS(N0, MagicFactor);

6471if (!Q)

6472returnSDValue();

6473

6474 Created.push_back(Q.getNode());

6475

6476// (Optionally) Add/subtract the numerator using Factor.

6477 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);

6478 Created.push_back(Factor.getNode());

6479 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);

6480 Created.push_back(Q.getNode());

6481

6482// Shift right algebraic by shift value.

6483 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);

6484 Created.push_back(Q.getNode());

6485

6486// Extract the sign bit, mask it and add it to the quotient.

6487SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);

6488SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);

6489 Created.push_back(T.getNode());

6490T = DAG.getNode(ISD::AND, dl, VT,T, ShiftMask);

6491 Created.push_back(T.getNode());

6492return DAG.getNode(ISD::ADD, dl, VT, Q,T);

6493}

6494

6495/// Given an ISD::UDIV node expressing a divide by constant,

6496/// return a DAG expression to select that will generate the same value by

6497/// multiplying by a magic number.

6498/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".

6499SDValue TargetLowering::BuildUDIV(SDNode *N,SelectionDAG &DAG,

6500bool IsAfterLegalization,

6501bool IsAfterLegalTypes,

6502SmallVectorImpl<SDNode *> &Created) const{

6503SDLoc dl(N);

6504EVT VT =N->getValueType(0);

6505EVT SVT = VT.getScalarType();

6506EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());

6507EVT ShSVT = ShVT.getScalarType();

6508unsigned EltBits = VT.getScalarSizeInBits();

6509EVT MulVT;

6510

6511// Check to see if we can do this.

6512// FIXME: We should be more aggressive here.

6513if (!isTypeLegal(VT)) {

6514// Limit this to simple scalars for now.

6515if (VT.isVector() || !VT.isSimple())

6516returnSDValue();

6517

6518// If this type will be promoted to a large enough type with a legal

6519// multiply operation, we can go ahead and do this transform.

6520if (getTypeAction(VT.getSimpleVT()) !=TypePromoteInteger)

6521returnSDValue();

6522

6523 MulVT =getTypeToTransformTo(*DAG.getContext(), VT);

6524if (MulVT.getSizeInBits() < (2 * EltBits) ||

6525 !isOperationLegal(ISD::MUL, MulVT))

6526returnSDValue();

6527 }

6528

6529// If the udiv has an 'exact' bit we can use a simpler lowering.

6530if (N->getFlags().hasExact())

6531returnBuildExactUDIV(*this,N, dl, DAG, Created);

6532

6533SDValue N0 =N->getOperand(0);

6534SDValue N1 =N->getOperand(1);

6535

6536// Try to use leading zeros of the dividend to reduce the multiplier and

6537// avoid expensive fixups.

6538unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();

6539

6540bool UseNPQ =false, UsePreShift =false, UsePostShift =false;

6541SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;

6542

6543auto BuildUDIVPattern = [&](ConstantSDNode *C) {

6544if (C->isZero())

6545returnfalse;

6546constAPInt& Divisor =C->getAPIntValue();

6547

6548SDValue PreShift, MagicFactor, NPQFactor, PostShift;

6549

6550// Magic algorithm doesn't work for division by 1. We need to emit a select

6551// at the end.

6552if (Divisor.isOne()) {

6553 PreShift = PostShift = DAG.getUNDEF(ShSVT);

6554 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);

6555 }else {

6556UnsignedDivisionByConstantInfo magics =

6557UnsignedDivisionByConstantInfo::get(

6558 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));

6559

6560 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);

6561

6562assert(magics.PreShift < Divisor.getBitWidth() &&

6563"We shouldn't generate an undefined shift!");

6564assert(magics.PostShift < Divisor.getBitWidth() &&

6565"We shouldn't generate an undefined shift!");

6566assert((!magics.IsAdd || magics.PreShift == 0) &&

6567"Unexpected pre-shift");

6568 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);

6569 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);

6570 NPQFactor = DAG.getConstant(

6571 magics.IsAdd ?APInt::getOneBitSet(EltBits, EltBits - 1)

6572 :APInt::getZero(EltBits),

6573 dl, SVT);

6574 UseNPQ |= magics.IsAdd;

6575 UsePreShift |= magics.PreShift != 0;

6576 UsePostShift |= magics.PostShift != 0;

6577 }

6578

6579 PreShifts.push_back(PreShift);

6580 MagicFactors.push_back(MagicFactor);

6581 NPQFactors.push_back(NPQFactor);

6582 PostShifts.push_back(PostShift);

6583returntrue;

6584 };

6585

6586// Collect the shifts/magic values from each element.

6587if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))

6588returnSDValue();

6589

6590SDValue PreShift, PostShift, MagicFactor, NPQFactor;

6591if (N1.getOpcode() ==ISD::BUILD_VECTOR) {

6592 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);

6593 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);

6594 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);

6595 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);

6596 }elseif (N1.getOpcode() ==ISD::SPLAT_VECTOR) {

6597assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&

6598 NPQFactors.size() == 1 && PostShifts.size() == 1 &&

6599"Expected matchUnaryPredicate to return one for scalable vectors");

6600 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);

6601 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);

6602 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);

6603 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);

6604 }else {

6605assert(isa<ConstantSDNode>(N1) &&"Expected a constant");

6606 PreShift = PreShifts[0];

6607 MagicFactor = MagicFactors[0];

6608 PostShift = PostShifts[0];

6609 }

6610

6611SDValue Q = N0;

6612if (UsePreShift) {

6613 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);

6614 Created.push_back(Q.getNode());

6615 }

6616

6617// FIXME: We should support doing a MUL in a wider type.

6618auto GetMULHU = [&](SDValue X,SDValue Y) {

6619// If the type isn't legal, use a wider mul of the type calculated

6620// earlier.

6621if (!isTypeLegal(VT)) {

6622X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT,X);

6623Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT,Y);

6624Y = DAG.getNode(ISD::MUL, dl, MulVT,X,Y);

6625Y = DAG.getNode(ISD::SRL, dl, MulVT,Y,

6626 DAG.getShiftAmountConstant(EltBits, MulVT, dl));

6627return DAG.getNode(ISD::TRUNCATE, dl, VT,Y);

6628 }

6629

6630if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))

6631return DAG.getNode(ISD::MULHU, dl, VT,X,Y);

6632if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {

6633SDValue LoHi =

6634 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),X,Y);

6635returnSDValue(LoHi.getNode(), 1);

6636 }

6637// If type twice as wide legal, widen and use a mul plus a shift.

6638unsignedSize = VT.getScalarSizeInBits();

6639EVT WideVT =EVT::getIntegerVT(*DAG.getContext(),Size * 2);

6640if (VT.isVector())

6641 WideVT =EVT::getVectorVT(*DAG.getContext(), WideVT,

6642 VT.getVectorElementCount());

6643// Some targets like AMDGPU try to go from UDIV to UDIVREM which is then

6644// custom lowered. This is very expensive so avoid it at all costs for

6645// constant divisors.

6646if ((!IsAfterLegalTypes &&isOperationExpand(ISD::UDIV, VT) &&

6647isOperationCustom(ISD::UDIVREM, VT.getScalarType())) ||

6648isOperationLegalOrCustom(ISD::MUL, WideVT)) {

6649X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT,X);

6650Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT,Y);

6651Y = DAG.getNode(ISD::MUL, dl, WideVT,X,Y);

6652Y = DAG.getNode(ISD::SRL, dl, WideVT,Y,

6653 DAG.getShiftAmountConstant(EltBits, WideVT, dl));

6654return DAG.getNode(ISD::TRUNCATE, dl, VT,Y);

6655 }

6656returnSDValue();// No mulhu or equivalent

6657 };

6658

6659// Multiply the numerator (operand 0) by the magic value.

6660 Q = GetMULHU(Q, MagicFactor);

6661if (!Q)

6662returnSDValue();

6663

6664 Created.push_back(Q.getNode());

6665

6666if (UseNPQ) {

6667SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);

6668 Created.push_back(NPQ.getNode());

6669

6670// For vectors we might have a mix of non-NPQ/NPQ paths, so use

6671// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.

6672if (VT.isVector())

6673 NPQ = GetMULHU(NPQ, NPQFactor);

6674else

6675 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));

6676

6677 Created.push_back(NPQ.getNode());

6678

6679 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);

6680 Created.push_back(Q.getNode());

6681 }

6682

6683if (UsePostShift) {

6684 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);

6685 Created.push_back(Q.getNode());

6686 }

6687

6688EVT SetCCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

6689

6690SDValue One = DAG.getConstant(1, dl, VT);

6691SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One,ISD::SETEQ);

6692return DAG.getSelect(dl, VT, IsOne, N0, Q);

6693}

6694

6695/// If all values in Values that *don't* match the predicate are same 'splat'

6696/// value, then replace all values with that splat value.

6697/// Else, if AlternativeReplacement was provided, then replace all values that

6698/// do match predicate with AlternativeReplacement value.

6699staticvoid

6700turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,

6701 std::function<bool(SDValue)>Predicate,

6702SDValue AlternativeReplacement =SDValue()) {

6703SDValue Replacement;

6704// Is there a value for which the Predicate does *NOT* match? What is it?

6705auto SplatValue =llvm::find_if_not(Values,Predicate);

6706if (SplatValue != Values.end()) {

6707// Does Values consist only of SplatValue's and values matching Predicate?

6708if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {

6709returnValue == *SplatValue ||Predicate(Value);

6710 }))// Then we shall replace values matching predicate with SplatValue.

6711 Replacement = *SplatValue;

6712 }

6713if (!Replacement) {

6714// Oops, we did not find the "baseline" splat value.

6715if (!AlternativeReplacement)

6716return;// Nothing to do.

6717// Let's replace with provided value then.

6718 Replacement = AlternativeReplacement;

6719 }

6720 std::replace_if(Values.begin(), Values.end(),Predicate, Replacement);

6721}

6722

6723/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE

6724/// where the divisor is constant and the comparison target is zero,

6725/// return a DAG expression that will generate the same comparison result

6726/// using only multiplications, additions and shifts/rotations.

6727/// Ref: "Hacker's Delight" 10-17.

6728SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT,SDValue REMNode,

6729SDValue CompTargetNode,

6730ISD::CondCode Cond,

6731 DAGCombinerInfo &DCI,

6732constSDLoc &DL) const{

6733SmallVector<SDNode *, 5> Built;

6734if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode,Cond,

6735 DCI,DL, Built)) {

6736for (SDNode *N : Built)

6737 DCI.AddToWorklist(N);

6738return Folded;

6739 }

6740

6741returnSDValue();

6742}

6743

6744SDValue

6745TargetLowering::prepareUREMEqFold(EVT SETCCVT,SDValue REMNode,

6746SDValue CompTargetNode,ISD::CondCode Cond,

6747 DAGCombinerInfo &DCI,constSDLoc &DL,

6748SmallVectorImpl<SDNode *> &Created) const{

6749// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)

6750// - D must be constant, with D = D0 * 2^K where D0 is odd

6751// - P is the multiplicative inverse of D0 modulo 2^W

6752// - Q = floor(((2^W) - 1) / D)

6753// where W is the width of the common type of N and D.

6754assert((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

6755"Only applicable for (in)equality comparisons.");

6756

6757SelectionDAG &DAG = DCI.DAG;

6758

6759EVT VT = REMNode.getValueType();

6760EVT SVT = VT.getScalarType();

6761EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());

6762EVT ShSVT = ShVT.getScalarType();

6763

6764// If MUL is unavailable, we cannot proceed in any case.

6765if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))

6766returnSDValue();

6767

6768bool ComparingWithAllZeros =true;

6769bool AllComparisonsWithNonZerosAreTautological =true;

6770bool HadTautologicalLanes =false;

6771bool AllLanesAreTautological =true;

6772bool HadEvenDivisor =false;

6773bool AllDivisorsArePowerOfTwo =true;

6774bool HadTautologicalInvertedLanes =false;

6775SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;

6776

6777auto BuildUREMPattern = [&](ConstantSDNode *CDiv,ConstantSDNode *CCmp) {

6778// Division by 0 is UB. Leave it to be constant-folded elsewhere.

6779if (CDiv->isZero())

6780returnfalse;

6781

6782constAPInt &D = CDiv->getAPIntValue();

6783constAPInt &Cmp = CCmp->getAPIntValue();

6784

6785 ComparingWithAllZeros &=Cmp.isZero();

6786

6787// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,

6788// if C2 is not less than C1, the comparison is always false.

6789// But we will only be able to produce the comparison that will give the

6790// opposive tautological answer. So this lane would need to be fixed up.

6791bool TautologicalInvertedLane =D.ule(Cmp);

6792 HadTautologicalInvertedLanes |= TautologicalInvertedLane;

6793

6794// If all lanes are tautological (either all divisors are ones, or divisor

6795// is not greater than the constant we are comparing with),

6796// we will prefer to avoid the fold.

6797bool TautologicalLane =D.isOne() || TautologicalInvertedLane;

6798 HadTautologicalLanes |= TautologicalLane;

6799 AllLanesAreTautological &= TautologicalLane;

6800

6801// If we are comparing with non-zero, we need'll need to subtract said

6802// comparison value from the LHS. But there is no point in doing that if

6803// every lane where we are comparing with non-zero is tautological..

6804if (!Cmp.isZero())

6805 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;

6806

6807// Decompose D into D0 * 2^K

6808unsignedK =D.countr_zero();

6809assert((!D.isOne() || (K == 0)) &&"For divisor '1' we won't rotate.");

6810APInt D0 =D.lshr(K);

6811

6812// D is even if it has trailing zeros.

6813 HadEvenDivisor |= (K != 0);

6814// D is a power-of-two if D0 is one.

6815// If all divisors are power-of-two, we will prefer to avoid the fold.

6816 AllDivisorsArePowerOfTwo &= D0.isOne();

6817

6818// P = inv(D0, 2^W)

6819// 2^W requires W + 1 bits, so we have to extend and then truncate.

6820unsignedW =D.getBitWidth();

6821APInt P = D0.multiplicativeInverse();

6822assert((D0 *P).isOne() &&"Multiplicative inverse basic check failed.");

6823

6824// Q = floor((2^W - 1) u/ D)

6825// R = ((2^W - 1) u% D)

6826APInt Q,R;

6827APInt::udivrem(APInt::getAllOnes(W),D, Q, R);

6828

6829// If we are comparing with zero, then that comparison constant is okay,

6830// else it may need to be one less than that.

6831if (Cmp.ugt(R))

6832 Q -= 1;

6833

6834assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&

6835"We are expecting that K is always less than all-ones for ShSVT");

6836

6837// If the lane is tautological the result can be constant-folded.

6838if (TautologicalLane) {

6839// Set P and K amount to a bogus values so we can try to splat them.

6840P = 0;

6841K = -1;

6842// And ensure that comparison constant is tautological,

6843// it will always compare true/false.

6844 Q = -1;

6845 }

6846

6847 PAmts.push_back(DAG.getConstant(P,DL, SVT));

6848 KAmts.push_back(

6849 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K,/*isSigned=*/false,

6850/*implicitTrunc=*/true),

6851DL, ShSVT));

6852 QAmts.push_back(DAG.getConstant(Q,DL, SVT));

6853returntrue;

6854 };

6855

6856SDValue N = REMNode.getOperand(0);

6857SDValue D = REMNode.getOperand(1);

6858

6859// Collect the values from each element.

6860if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))

6861returnSDValue();

6862

6863// If all lanes are tautological, the result can be constant-folded.

6864if (AllLanesAreTautological)

6865returnSDValue();

6866

6867// If this is a urem by a powers-of-two, avoid the fold since it can be

6868// best implemented as a bit test.

6869if (AllDivisorsArePowerOfTwo)

6870returnSDValue();

6871

6872SDValue PVal, KVal, QVal;

6873if (D.getOpcode() ==ISD::BUILD_VECTOR) {

6874if (HadTautologicalLanes) {

6875// Try to turn PAmts into a splat, since we don't care about the values

6876// that are currently '0'. If we can't, just keep '0'`s.

6877turnVectorIntoSplatVector(PAmts,isNullConstant);

6878// Try to turn KAmts into a splat, since we don't care about the values

6879// that are currently '-1'. If we can't, change them to '0'`s.

6880turnVectorIntoSplatVector(KAmts,isAllOnesConstant,

6881 DAG.getConstant(0,DL, ShSVT));

6882 }

6883

6884 PVal = DAG.getBuildVector(VT,DL, PAmts);

6885 KVal = DAG.getBuildVector(ShVT,DL, KAmts);

6886 QVal = DAG.getBuildVector(VT,DL, QAmts);

6887 }elseif (D.getOpcode() ==ISD::SPLAT_VECTOR) {

6888assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&

6889"Expected matchBinaryPredicate to return one element for "

6890"SPLAT_VECTORs");

6891 PVal = DAG.getSplatVector(VT,DL, PAmts[0]);

6892 KVal = DAG.getSplatVector(ShVT,DL, KAmts[0]);

6893 QVal = DAG.getSplatVector(VT,DL, QAmts[0]);

6894 }else {

6895 PVal = PAmts[0];

6896 KVal = KAmts[0];

6897 QVal = QAmts[0];

6898 }

6899

6900if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {

6901if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))

6902returnSDValue();// FIXME: Could/should use `ISD::ADD`?

6903assert(CompTargetNode.getValueType() ==N.getValueType() &&

6904"Expecting that the types on LHS and RHS of comparisons match.");

6905N = DAG.getNode(ISD::SUB,DL, VT,N, CompTargetNode);

6906 }

6907

6908// (mul N, P)

6909SDValue Op0 = DAG.getNode(ISD::MUL,DL, VT,N, PVal);

6910 Created.push_back(Op0.getNode());

6911

6912// Rotate right only if any divisor was even. We avoid rotates for all-odd

6913// divisors as a performance improvement, since rotating by 0 is a no-op.

6914if (HadEvenDivisor) {

6915// We need ROTR to do this.

6916if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))

6917returnSDValue();

6918// UREM: (rotr (mul N, P), K)

6919 Op0 = DAG.getNode(ISD::ROTR,DL, VT, Op0, KVal);

6920 Created.push_back(Op0.getNode());

6921 }

6922

6923// UREM: (setule/setugt (rotr (mul N, P), K), Q)

6924SDValue NewCC =

6925 DAG.getSetCC(DL, SETCCVT, Op0, QVal,

6926 ((Cond ==ISD::SETEQ) ?ISD::SETULE :ISD::SETUGT));

6927if (!HadTautologicalInvertedLanes)

6928return NewCC;

6929

6930// If any lanes previously compared always-false, the NewCC will give

6931// always-true result for them, so we need to fixup those lanes.

6932// Or the other way around for inequality predicate.

6933assert(VT.isVector() &&"Can/should only get here for vectors.");

6934 Created.push_back(NewCC.getNode());

6935

6936// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,

6937// if C2 is not less than C1, the comparison is always false.

6938// But we have produced the comparison that will give the

6939// opposive tautological answer. So these lanes would need to be fixed up.

6940SDValue TautologicalInvertedChannels =

6941 DAG.getSetCC(DL, SETCCVT,D, CompTargetNode,ISD::SETULE);

6942 Created.push_back(TautologicalInvertedChannels.getNode());

6943

6944// NOTE: we avoid letting illegal types through even if we're before legalize

6945// ops – legalization has a hard time producing good code for this.

6946if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {

6947// If we have a vector select, let's replace the comparison results in the

6948// affected lanes with the correct tautological result.

6949SDValue Replacement = DAG.getBoolConstant(Cond ==ISD::SETEQ ?false :true,

6950DL, SETCCVT, SETCCVT);

6951return DAG.getNode(ISD::VSELECT,DL, SETCCVT, TautologicalInvertedChannels,

6952 Replacement, NewCC);

6953 }

6954

6955// Else, we can just invert the comparison result in the appropriate lanes.

6956//

6957// NOTE: see the note above VSELECT above.

6958if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))

6959return DAG.getNode(ISD::XOR,DL, SETCCVT, NewCC,

6960 TautologicalInvertedChannels);

6961

6962returnSDValue();// Don't know how to lower.

6963}

6964

6965/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE

6966/// where the divisor is constant and the comparison target is zero,

6967/// return a DAG expression that will generate the same comparison result

6968/// using only multiplications, additions and shifts/rotations.

6969/// Ref: "Hacker's Delight" 10-17.

6970SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT,SDValue REMNode,

6971SDValue CompTargetNode,

6972ISD::CondCode Cond,

6973 DAGCombinerInfo &DCI,

6974constSDLoc &DL) const{

6975SmallVector<SDNode *, 7> Built;

6976if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode,Cond,

6977 DCI,DL, Built)) {

6978assert(Built.size() <= 7 &&"Max size prediction failed.");

6979for (SDNode *N : Built)

6980 DCI.AddToWorklist(N);

6981return Folded;

6982 }

6983

6984returnSDValue();

6985}

6986

6987SDValue

6988TargetLowering::prepareSREMEqFold(EVT SETCCVT,SDValue REMNode,

6989SDValue CompTargetNode,ISD::CondCode Cond,

6990 DAGCombinerInfo &DCI,constSDLoc &DL,

6991SmallVectorImpl<SDNode *> &Created) const{

6992// Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.

6993// Fold:

6994// (seteq/ne (srem N, D), 0)

6995// To:

6996// (setule/ugt (rotr (add (mul N, P), A), K), Q)

6997//

6998// - D must be constant, with D = D0 * 2^K where D0 is odd

6999// - P is the multiplicative inverse of D0 modulo 2^W

7000// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))

7001// - Q = floor((2 * A) / (2^K))

7002// where W is the width of the common type of N and D.

7003//

7004// When D is a power of two (and thus D0 is 1), the normal

7005// formula for A and Q don't apply, because the derivation

7006// depends on D not dividing 2^(W-1), and thus theorem ZRS

7007// does not apply. This specifically fails when N = INT_MIN.

7008//

7009// Instead, for power-of-two D, we use:

7010// - A = 2^(W-1)

7011// |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])

7012// - Q = 2^(W-K) - 1

7013// |-> Test that the top K bits are zero after rotation

7014assert((Cond ==ISD::SETEQ ||Cond ==ISD::SETNE) &&

7015"Only applicable for (in)equality comparisons.");

7016

7017SelectionDAG &DAG = DCI.DAG;

7018

7019EVT VT = REMNode.getValueType();

7020EVT SVT = VT.getScalarType();

7021EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());

7022EVT ShSVT = ShVT.getScalarType();

7023

7024// If we are after ops legalization, and MUL is unavailable, we can not

7025// proceed.

7026if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))

7027returnSDValue();

7028

7029// TODO: Could support comparing with non-zero too.

7030ConstantSDNode *CompTarget =isConstOrConstSplat(CompTargetNode);

7031if (!CompTarget || !CompTarget->isZero())

7032returnSDValue();

7033

7034bool HadIntMinDivisor =false;

7035bool HadOneDivisor =false;

7036bool AllDivisorsAreOnes =true;

7037bool HadEvenDivisor =false;

7038bool NeedToApplyOffset =false;

7039bool AllDivisorsArePowerOfTwo =true;

7040SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;

7041

7042auto BuildSREMPattern = [&](ConstantSDNode *C) {

7043// Division by 0 is UB. Leave it to be constant-folded elsewhere.

7044if (C->isZero())

7045returnfalse;

7046

7047// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.

7048

7049// WARNING: this fold is only valid for positive divisors!

7050APInt D =C->getAPIntValue();

7051if (D.isNegative())

7052D.negate();// `rem %X, -C` is equivalent to `rem %X, C`

7053

7054 HadIntMinDivisor |=D.isMinSignedValue();

7055

7056// If all divisors are ones, we will prefer to avoid the fold.

7057 HadOneDivisor |=D.isOne();

7058 AllDivisorsAreOnes &=D.isOne();

7059

7060// Decompose D into D0 * 2^K

7061unsignedK =D.countr_zero();

7062assert((!D.isOne() || (K == 0)) &&"For divisor '1' we won't rotate.");

7063APInt D0 =D.lshr(K);

7064

7065if (!D.isMinSignedValue()) {

7066// D is even if it has trailing zeros; unless it's INT_MIN, in which case

7067// we don't care about this lane in this fold, we'll special-handle it.

7068 HadEvenDivisor |= (K != 0);

7069 }

7070

7071// D is a power-of-two if D0 is one. This includes INT_MIN.

7072// If all divisors are power-of-two, we will prefer to avoid the fold.

7073 AllDivisorsArePowerOfTwo &= D0.isOne();

7074

7075// P = inv(D0, 2^W)

7076// 2^W requires W + 1 bits, so we have to extend and then truncate.

7077unsignedW =D.getBitWidth();

7078APInt P = D0.multiplicativeInverse();

7079assert((D0 *P).isOne() &&"Multiplicative inverse basic check failed.");

7080

7081// A = floor((2^(W - 1) - 1) / D0) & -2^K

7082APInt A =APInt::getSignedMaxValue(W).udiv(D0);

7083A.clearLowBits(K);

7084

7085if (!D.isMinSignedValue()) {

7086// If divisor INT_MIN, then we don't care about this lane in this fold,

7087// we'll special-handle it.

7088 NeedToApplyOffset |=A != 0;

7089 }

7090

7091// Q = floor((2 * A) / (2^K))

7092APInt Q = (2 *A).udiv(APInt::getOneBitSet(W, K));

7093

7094assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&

7095"We are expecting that A is always less than all-ones for SVT");

7096assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&

7097"We are expecting that K is always less than all-ones for ShSVT");

7098

7099// If D was a power of two, apply the alternate constant derivation.

7100if (D0.isOne()) {

7101// A = 2^(W-1)

7102A =APInt::getSignedMinValue(W);

7103// - Q = 2^(W-K) - 1

7104 Q =APInt::getAllOnes(W - K).zext(W);

7105 }

7106

7107// If the divisor is 1 the result can be constant-folded. Likewise, we

7108// don't care about INT_MIN lanes, those can be set to undef if appropriate.

7109if (D.isOne()) {

7110// Set P, A and K to a bogus values so we can try to splat them.

7111P = 0;

7112A = -1;

7113K = -1;

7114

7115// x ?% 1 == 0 <--> true <--> x u<= -1

7116 Q = -1;

7117 }

7118

7119 PAmts.push_back(DAG.getConstant(P,DL, SVT));

7120 AAmts.push_back(DAG.getConstant(A,DL, SVT));

7121 KAmts.push_back(

7122 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K,/*isSigned=*/false,

7123/*implicitTrunc=*/true),

7124DL, ShSVT));

7125 QAmts.push_back(DAG.getConstant(Q,DL, SVT));

7126returntrue;

7127 };

7128

7129SDValue N = REMNode.getOperand(0);

7130SDValue D = REMNode.getOperand(1);

7131

7132// Collect the values from each element.

7133if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))

7134returnSDValue();

7135

7136// If this is a srem by a one, avoid the fold since it can be constant-folded.

7137if (AllDivisorsAreOnes)

7138returnSDValue();

7139

7140// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold

7141// since it can be best implemented as a bit test.

7142if (AllDivisorsArePowerOfTwo)

7143returnSDValue();

7144

7145SDValue PVal, AVal, KVal, QVal;

7146if (D.getOpcode() ==ISD::BUILD_VECTOR) {

7147if (HadOneDivisor) {

7148// Try to turn PAmts into a splat, since we don't care about the values

7149// that are currently '0'. If we can't, just keep '0'`s.

7150turnVectorIntoSplatVector(PAmts,isNullConstant);

7151// Try to turn AAmts into a splat, since we don't care about the

7152// values that are currently '-1'. If we can't, change them to '0'`s.

7153turnVectorIntoSplatVector(AAmts,isAllOnesConstant,

7154 DAG.getConstant(0,DL, SVT));

7155// Try to turn KAmts into a splat, since we don't care about the values

7156// that are currently '-1'. If we can't, change them to '0'`s.

7157turnVectorIntoSplatVector(KAmts,isAllOnesConstant,

7158 DAG.getConstant(0,DL, ShSVT));

7159 }

7160

7161 PVal = DAG.getBuildVector(VT,DL, PAmts);

7162 AVal = DAG.getBuildVector(VT,DL, AAmts);

7163 KVal = DAG.getBuildVector(ShVT,DL, KAmts);

7164 QVal = DAG.getBuildVector(VT,DL, QAmts);

7165 }elseif (D.getOpcode() ==ISD::SPLAT_VECTOR) {

7166assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&

7167 QAmts.size() == 1 &&

7168"Expected matchUnaryPredicate to return one element for scalable "

7169"vectors");

7170 PVal = DAG.getSplatVector(VT,DL, PAmts[0]);

7171 AVal = DAG.getSplatVector(VT,DL, AAmts[0]);

7172 KVal = DAG.getSplatVector(ShVT,DL, KAmts[0]);

7173 QVal = DAG.getSplatVector(VT,DL, QAmts[0]);

7174 }else {

7175assert(isa<ConstantSDNode>(D) &&"Expected a constant");

7176 PVal = PAmts[0];

7177 AVal = AAmts[0];

7178 KVal = KAmts[0];

7179 QVal = QAmts[0];

7180 }

7181

7182// (mul N, P)

7183SDValue Op0 = DAG.getNode(ISD::MUL,DL, VT,N, PVal);

7184 Created.push_back(Op0.getNode());

7185

7186if (NeedToApplyOffset) {

7187// We need ADD to do this.

7188if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))

7189returnSDValue();

7190

7191// (add (mul N, P), A)

7192 Op0 = DAG.getNode(ISD::ADD,DL, VT, Op0, AVal);

7193 Created.push_back(Op0.getNode());

7194 }

7195

7196// Rotate right only if any divisor was even. We avoid rotates for all-odd

7197// divisors as a performance improvement, since rotating by 0 is a no-op.

7198if (HadEvenDivisor) {

7199// We need ROTR to do this.

7200if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))

7201returnSDValue();

7202// SREM: (rotr (add (mul N, P), A), K)

7203 Op0 = DAG.getNode(ISD::ROTR,DL, VT, Op0, KVal);

7204 Created.push_back(Op0.getNode());

7205 }

7206

7207// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)

7208SDValue Fold =

7209 DAG.getSetCC(DL, SETCCVT, Op0, QVal,

7210 ((Cond ==ISD::SETEQ) ?ISD::SETULE :ISD::SETUGT));

7211

7212// If we didn't have lanes with INT_MIN divisor, then we're done.

7213if (!HadIntMinDivisor)

7214return Fold;

7215

7216// That fold is only valid for positive divisors. Which effectively means,

7217// it is invalid for INT_MIN divisors. So if we have such a lane,

7218// we must fix-up results for said lanes.

7219assert(VT.isVector() &&"Can/should only get here for vectors.");

7220

7221// NOTE: we avoid letting illegal types through even if we're before legalize

7222// ops – legalization has a hard time producing good code for the code that

7223// follows.

7224if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||

7225 !isOperationLegalOrCustom(ISD::AND, VT) ||

7226 !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||

7227 !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))

7228returnSDValue();

7229

7230 Created.push_back(Fold.getNode());

7231

7232SDValue IntMin = DAG.getConstant(

7233APInt::getSignedMinValue(SVT.getScalarSizeInBits()),DL, VT);

7234SDValue IntMax = DAG.getConstant(

7235APInt::getSignedMaxValue(SVT.getScalarSizeInBits()),DL, VT);

7236SDValue Zero =

7237 DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()),DL, VT);

7238

7239// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.

7240SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT,D, IntMin,ISD::SETEQ);

7241 Created.push_back(DivisorIsIntMin.getNode());

7242

7243// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0

7244SDValue Masked = DAG.getNode(ISD::AND,DL, VT,N, IntMax);

7245 Created.push_back(Masked.getNode());

7246SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT,Masked, Zero,Cond);

7247 Created.push_back(MaskedIsZero.getNode());

7248

7249// To produce final result we need to blend 2 vectors: 'SetCC' and

7250// 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick

7251// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is

7252// constant-folded, select can get lowered to a shuffle with constant mask.

7253SDValue Blended = DAG.getNode(ISD::VSELECT,DL, SETCCVT, DivisorIsIntMin,

7254 MaskedIsZero, Fold);

7255

7256return Blended;

7257}

7258

7259boolTargetLowering::

7260verifyReturnAddressArgumentIsConstant(SDValue Op,SelectionDAG &DAG) const{

7261if (!isa<ConstantSDNode>(Op.getOperand(0))) {

7262 DAG.getContext()->emitError("argument to '__builtin_return_address' must "

7263"be a constant integer");

7264returntrue;

7265 }

7266

7267returnfalse;

7268}

7269

7270SDValue TargetLowering::getSqrtInputTest(SDValue Op,SelectionDAG &DAG,

7271constDenormalMode &Mode) const{

7272SDLoc DL(Op);

7273EVT VT =Op.getValueType();

7274EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

7275SDValue FPZero = DAG.getConstantFP(0.0,DL, VT);

7276

7277// This is specifically a check for the handling of denormal inputs, not the

7278// result.

7279if (Mode.Input ==DenormalMode::PreserveSign ||

7280 Mode.Input ==DenormalMode::PositiveZero) {

7281// Test = X == 0.0

7282return DAG.getSetCC(DL, CCVT,Op, FPZero,ISD::SETEQ);

7283 }

7284

7285// Testing it with denormal inputs to avoid wrong estimate.

7286//

7287// Test = fabs(X) < SmallestNormal

7288constfltSemantics &FltSem = VT.getFltSemantics();

7289APFloat SmallestNorm =APFloat::getSmallestNormalized(FltSem);

7290SDValue NormC = DAG.getConstantFP(SmallestNorm,DL, VT);

7291SDValue Fabs = DAG.getNode(ISD::FABS,DL, VT,Op);

7292return DAG.getSetCC(DL, CCVT, Fabs, NormC,ISD::SETLT);

7293}

7294

7295SDValue TargetLowering::getNegatedExpression(SDValue Op,SelectionDAG &DAG,

7296bool LegalOps,bool OptForSize,

7297NegatibleCost &Cost,

7298unsignedDepth) const{

7299// fneg is removable even if it has multiple uses.

7300if (Op.getOpcode() ==ISD::FNEG ||Op.getOpcode() == ISD::VP_FNEG) {

7301Cost =NegatibleCost::Cheaper;

7302returnOp.getOperand(0);

7303 }

7304

7305// Don't recurse exponentially.

7306if (Depth >SelectionDAG::MaxRecursionDepth)

7307returnSDValue();

7308

7309// Pre-increment recursion depth for use in recursive calls.

7310 ++Depth;

7311constSDNodeFlags Flags =Op->getFlags();

7312constTargetOptions &Options = DAG.getTarget().Options;

7313EVT VT =Op.getValueType();

7314unsigned Opcode =Op.getOpcode();

7315

7316// Don't allow anything with multiple uses unless we know it is free.

7317if (!Op.hasOneUse() && Opcode !=ISD::ConstantFP) {

7318bool IsFreeExtend = Opcode ==ISD::FP_EXTEND &&

7319isFPExtFree(VT,Op.getOperand(0).getValueType());

7320if (!IsFreeExtend)

7321returnSDValue();

7322 }

7323

7324auto RemoveDeadNode = [&](SDValue N) {

7325if (N &&N.getNode()->use_empty())

7326 DAG.RemoveDeadNode(N.getNode());

7327 };

7328

7329SDLoc DL(Op);

7330

7331// Because getNegatedExpression can delete nodes we need a handle to keep

7332// temporary nodes alive in case the recursion manages to create an identical

7333// node.

7334 std::list<HandleSDNode> Handles;

7335

7336switch (Opcode) {

7337caseISD::ConstantFP: {

7338// Don't invert constant FP values after legalization unless the target says

7339// the negated constant is legal.

7340bool IsOpLegal =

7341isOperationLegal(ISD::ConstantFP, VT) ||

7342isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,

7343 OptForSize);

7344

7345if (LegalOps && !IsOpLegal)

7346break;

7347

7348APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();

7349 V.changeSign();

7350SDValue CFP = DAG.getConstantFP(V,DL, VT);

7351

7352// If we already have the use of the negated floating constant, it is free

7353// to negate it even it has multiple uses.

7354if (!Op.hasOneUse() && CFP.use_empty())

7355break;

7356Cost =NegatibleCost::Neutral;

7357return CFP;

7358 }

7359caseISD::BUILD_VECTOR: {

7360// Only permit BUILD_VECTOR of constants.

7361if (llvm::any_of(Op->op_values(), [&](SDValue N) {

7362 return !N.isUndef() && !isa<ConstantFPSDNode>(N);

7363 }))

7364break;

7365

7366bool IsOpLegal =

7367 (isOperationLegal(ISD::ConstantFP, VT) &&

7368isOperationLegal(ISD::BUILD_VECTOR, VT)) ||

7369llvm::all_of(Op->op_values(), [&](SDValue N) {

7370returnN.isUndef() ||

7371isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,

7372 OptForSize);

7373 });

7374

7375if (LegalOps && !IsOpLegal)

7376break;

7377

7378SmallVector<SDValue, 4> Ops;

7379for (SDValue C :Op->op_values()) {

7380if (C.isUndef()) {

7381 Ops.push_back(C);

7382continue;

7383 }

7384APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();

7385 V.changeSign();

7386 Ops.push_back(DAG.getConstantFP(V,DL,C.getValueType()));

7387 }

7388Cost =NegatibleCost::Neutral;

7389return DAG.getBuildVector(VT,DL, Ops);

7390 }

7391caseISD::FADD: {

7392if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())

7393break;

7394

7395// After operation legalization, it might not be legal to create new FSUBs.

7396if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))

7397break;

7398SDValue X =Op.getOperand(0),Y =Op.getOperand(1);

7399

7400// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)

7401NegatibleCost CostX =NegatibleCost::Expensive;

7402SDValue NegX =

7403 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX,Depth);

7404// Prevent this node from being deleted by the next call.

7405if (NegX)

7406 Handles.emplace_back(NegX);

7407

7408// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)

7409NegatibleCost CostY =NegatibleCost::Expensive;

7410SDValue NegY =

7411 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY,Depth);

7412

7413// We're done with the handles.

7414 Handles.clear();

7415

7416// Negate the X if its cost is less or equal than Y.

7417if (NegX && (CostX <= CostY)) {

7418Cost = CostX;

7419SDValue N = DAG.getNode(ISD::FSUB,DL, VT, NegX,Y, Flags);

7420if (NegY !=N)

7421 RemoveDeadNode(NegY);

7422returnN;

7423 }

7424

7425// Negate the Y if it is not expensive.

7426if (NegY) {

7427Cost = CostY;

7428SDValue N = DAG.getNode(ISD::FSUB,DL, VT, NegY,X, Flags);

7429if (NegX !=N)

7430 RemoveDeadNode(NegX);

7431returnN;

7432 }

7433break;

7434 }

7435caseISD::FSUB: {

7436// We can't turn -(A-B) into B-A when we honor signed zeros.

7437if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())

7438break;

7439

7440SDValue X =Op.getOperand(0),Y =Op.getOperand(1);

7441// fold (fneg (fsub 0, Y)) -> Y

7442if (ConstantFPSDNode *C =isConstOrConstSplatFP(X,/*AllowUndefs*/true))

7443if (C->isZero()) {

7444Cost =NegatibleCost::Cheaper;

7445returnY;

7446 }

7447

7448// fold (fneg (fsub X, Y)) -> (fsub Y, X)

7449Cost =NegatibleCost::Neutral;

7450return DAG.getNode(ISD::FSUB,DL, VT,Y,X, Flags);

7451 }

7452caseISD::FMUL:

7453caseISD::FDIV: {

7454SDValue X =Op.getOperand(0),Y =Op.getOperand(1);

7455

7456// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)

7457NegatibleCost CostX =NegatibleCost::Expensive;

7458SDValue NegX =

7459 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX,Depth);

7460// Prevent this node from being deleted by the next call.

7461if (NegX)

7462 Handles.emplace_back(NegX);

7463

7464// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))

7465NegatibleCost CostY =NegatibleCost::Expensive;

7466SDValue NegY =

7467 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY,Depth);

7468

7469// We're done with the handles.

7470 Handles.clear();

7471

7472// Negate the X if its cost is less or equal than Y.

7473if (NegX && (CostX <= CostY)) {

7474Cost = CostX;

7475SDValue N = DAG.getNode(Opcode,DL, VT, NegX,Y, Flags);

7476if (NegY !=N)

7477 RemoveDeadNode(NegY);

7478returnN;

7479 }

7480

7481// Ignore X * 2.0 because that is expected to be canonicalized to X + X.

7482if (auto *C =isConstOrConstSplatFP(Op.getOperand(1)))

7483if (C->isExactlyValue(2.0) &&Op.getOpcode() ==ISD::FMUL)

7484break;

7485

7486// Negate the Y if it is not expensive.

7487if (NegY) {

7488Cost = CostY;

7489SDValue N = DAG.getNode(Opcode,DL, VT,X, NegY, Flags);

7490if (NegX !=N)

7491 RemoveDeadNode(NegX);

7492returnN;

7493 }

7494break;

7495 }

7496caseISD::FMA:

7497caseISD::FMAD: {

7498if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())

7499break;

7500

7501SDValue X =Op.getOperand(0),Y =Op.getOperand(1), Z =Op.getOperand(2);

7502NegatibleCost CostZ =NegatibleCost::Expensive;

7503SDValue NegZ =

7504 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ,Depth);

7505// Give up if fail to negate the Z.

7506if (!NegZ)

7507break;

7508

7509// Prevent this node from being deleted by the next two calls.

7510 Handles.emplace_back(NegZ);

7511

7512// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))

7513NegatibleCost CostX =NegatibleCost::Expensive;

7514SDValue NegX =

7515 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX,Depth);

7516// Prevent this node from being deleted by the next call.

7517if (NegX)

7518 Handles.emplace_back(NegX);

7519

7520// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))

7521NegatibleCost CostY =NegatibleCost::Expensive;

7522SDValue NegY =

7523 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY,Depth);

7524

7525// We're done with the handles.

7526 Handles.clear();

7527

7528// Negate the X if its cost is less or equal than Y.

7529if (NegX && (CostX <= CostY)) {

7530Cost = std::min(CostX, CostZ);

7531SDValue N = DAG.getNode(Opcode,DL, VT, NegX,Y, NegZ, Flags);

7532if (NegY !=N)

7533 RemoveDeadNode(NegY);

7534returnN;

7535 }

7536

7537// Negate the Y if it is not expensive.

7538if (NegY) {

7539Cost = std::min(CostY, CostZ);

7540SDValue N = DAG.getNode(Opcode,DL, VT,X, NegY, NegZ, Flags);

7541if (NegX !=N)

7542 RemoveDeadNode(NegX);

7543returnN;

7544 }

7545break;

7546 }

7547

7548caseISD::FP_EXTEND:

7549caseISD::FSIN:

7550if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,

7551 OptForSize,Cost,Depth))

7552return DAG.getNode(Opcode,DL, VT, NegV);

7553break;

7554caseISD::FP_ROUND:

7555if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,

7556 OptForSize,Cost,Depth))

7557return DAG.getNode(ISD::FP_ROUND,DL, VT, NegV,Op.getOperand(1));

7558break;

7559caseISD::SELECT:

7560caseISD::VSELECT: {

7561// fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))

7562// iff at least one cost is cheaper and the other is neutral/cheaper

7563SDValue LHS =Op.getOperand(1);

7564NegatibleCost CostLHS =NegatibleCost::Expensive;

7565SDValue NegLHS =

7566 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS,Depth);

7567if (!NegLHS || CostLHS >NegatibleCost::Neutral) {

7568 RemoveDeadNode(NegLHS);

7569break;

7570 }

7571

7572// Prevent this node from being deleted by the next call.

7573 Handles.emplace_back(NegLHS);

7574

7575SDValue RHS =Op.getOperand(2);

7576NegatibleCost CostRHS =NegatibleCost::Expensive;

7577SDValue NegRHS =

7578 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS,Depth);

7579

7580// We're done with the handles.

7581 Handles.clear();

7582

7583if (!NegRHS || CostRHS >NegatibleCost::Neutral ||

7584 (CostLHS !=NegatibleCost::Cheaper &&

7585 CostRHS !=NegatibleCost::Cheaper)) {

7586 RemoveDeadNode(NegLHS);

7587 RemoveDeadNode(NegRHS);

7588break;

7589 }

7590

7591Cost = std::min(CostLHS, CostRHS);

7592return DAG.getSelect(DL, VT,Op.getOperand(0), NegLHS, NegRHS);

7593 }

7594 }

7595

7596returnSDValue();

7597}

7598

7599//===----------------------------------------------------------------------===//

7600// Legalization Utilities

7601//===----------------------------------------------------------------------===//

7602

7603boolTargetLowering::expandMUL_LOHI(unsigned Opcode,EVT VT,constSDLoc &dl,

7604SDValue LHS,SDValue RHS,

7605SmallVectorImpl<SDValue> &Result,

7606EVT HiLoVT,SelectionDAG &DAG,

7607MulExpansionKind Kind,SDValue LL,

7608SDValue LH,SDValue RL,SDValue RH) const{

7609assert(Opcode ==ISD::MUL || Opcode ==ISD::UMUL_LOHI ||

7610 Opcode ==ISD::SMUL_LOHI);

7611

7612bool HasMULHS = (Kind ==MulExpansionKind::Always) ||

7613isOperationLegalOrCustom(ISD::MULHS, HiLoVT);

7614bool HasMULHU = (Kind ==MulExpansionKind::Always) ||

7615isOperationLegalOrCustom(ISD::MULHU, HiLoVT);

7616bool HasSMUL_LOHI = (Kind ==MulExpansionKind::Always) ||

7617isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);

7618bool HasUMUL_LOHI = (Kind ==MulExpansionKind::Always) ||

7619isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);

7620

7621if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)

7622returnfalse;

7623

7624unsigned OuterBitSize = VT.getScalarSizeInBits();

7625unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();

7626

7627// LL, LH, RL, and RH must be either all NULL or all set to a value.

7628assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||

7629 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));

7630

7631SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);

7632auto MakeMUL_LOHI = [&](SDValue L,SDValue R,SDValue &Lo,SDValue &Hi,

7633boolSigned) ->bool {

7634if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {

7635Lo = DAG.getNode(Signed ?ISD::SMUL_LOHI :ISD::UMUL_LOHI, dl, VTs, L, R);

7636Hi =SDValue(Lo.getNode(), 1);

7637returntrue;

7638 }

7639if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {

7640Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);

7641Hi = DAG.getNode(Signed ?ISD::MULHS :ISD::MULHU, dl, HiLoVT, L, R);

7642returntrue;

7643 }

7644returnfalse;

7645 };

7646

7647SDValue Lo,Hi;

7648

7649if (!LL.getNode() && !RL.getNode() &&

7650isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {

7651 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT,LHS);

7652 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT,RHS);

7653 }

7654

7655if (!LL.getNode())

7656returnfalse;

7657

7658APInt HighMask =APInt::getHighBitsSet(OuterBitSize, InnerBitSize);

7659if (DAG.MaskedValueIsZero(LHS, HighMask) &&

7660 DAG.MaskedValueIsZero(RHS, HighMask)) {

7661// The inputs are both zero-extended.

7662if (MakeMUL_LOHI(LL, RL,Lo,Hi,false)) {

7663 Result.push_back(Lo);

7664 Result.push_back(Hi);

7665if (Opcode !=ISD::MUL) {

7666SDValue Zero = DAG.getConstant(0, dl, HiLoVT);

7667 Result.push_back(Zero);

7668 Result.push_back(Zero);

7669 }

7670returntrue;

7671 }

7672 }

7673

7674if (!VT.isVector() && Opcode ==ISD::MUL &&

7675 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&

7676 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {

7677// The input values are both sign-extended.

7678// TODO non-MUL case?

7679if (MakeMUL_LOHI(LL, RL,Lo,Hi,true)) {

7680 Result.push_back(Lo);

7681 Result.push_back(Hi);

7682returntrue;

7683 }

7684 }

7685

7686unsigned ShiftAmount = OuterBitSize - InnerBitSize;

7687SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);

7688

7689if (!LH.getNode() && !RH.getNode() &&

7690isOperationLegalOrCustom(ISD::SRL, VT) &&

7691isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {

7692 LH = DAG.getNode(ISD::SRL, dl, VT,LHS, Shift);

7693 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);

7694 RH = DAG.getNode(ISD::SRL, dl, VT,RHS, Shift);

7695 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);

7696 }

7697

7698if (!LH.getNode())

7699returnfalse;

7700

7701if (!MakeMUL_LOHI(LL, RL,Lo,Hi,false))

7702returnfalse;

7703

7704 Result.push_back(Lo);

7705

7706if (Opcode ==ISD::MUL) {

7707 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);

7708 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);

7709Hi = DAG.getNode(ISD::ADD, dl, HiLoVT,Hi, RH);

7710Hi = DAG.getNode(ISD::ADD, dl, HiLoVT,Hi, LH);

7711 Result.push_back(Hi);

7712returntrue;

7713 }

7714

7715// Compute the full width result.

7716autoMerge = [&](SDValue Lo,SDValue Hi) ->SDValue {

7717Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT,Lo);

7718Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT,Hi);

7719Hi = DAG.getNode(ISD::SHL, dl, VT,Hi, Shift);

7720return DAG.getNode(ISD::OR, dl, VT,Lo,Hi);

7721 };

7722

7723SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT,Hi);

7724if (!MakeMUL_LOHI(LL, RH,Lo,Hi,false))

7725returnfalse;

7726

7727// This is effectively the add part of a multiply-add of half-sized operands,

7728// so it cannot overflow.

7729 Next = DAG.getNode(ISD::ADD, dl, VT, Next,Merge(Lo,Hi));

7730

7731if (!MakeMUL_LOHI(LH, RL,Lo,Hi,false))

7732returnfalse;

7733

7734SDValue Zero = DAG.getConstant(0, dl, HiLoVT);

7735EVT BoolType =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

7736

7737bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&

7738isOperationLegalOrCustom(ISD::ADDE, VT));

7739if (UseGlue)

7740 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,

7741Merge(Lo,Hi));

7742else

7743 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,

7744Merge(Lo,Hi), DAG.getConstant(0, dl, BoolType));

7745

7746SDValue Carry = Next.getValue(1);

7747 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));

7748 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);

7749

7750if (!MakeMUL_LOHI(LH, RH,Lo,Hi, Opcode ==ISD::SMUL_LOHI))

7751returnfalse;

7752

7753if (UseGlue)

7754Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue),Hi, Zero,

7755 Carry);

7756else

7757Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType),Hi,

7758 Zero, Carry);

7759

7760 Next = DAG.getNode(ISD::ADD, dl, VT, Next,Merge(Lo,Hi));

7761

7762if (Opcode ==ISD::SMUL_LOHI) {

7763SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,

7764 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));

7765 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next,ISD::SETLT);

7766

7767 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,

7768 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));

7769 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next,ISD::SETLT);

7770 }

7771

7772 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));

7773 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);

7774 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));

7775returntrue;

7776}

7777

7778boolTargetLowering::expandMUL(SDNode *N,SDValue &Lo,SDValue &Hi,EVT HiLoVT,

7779SelectionDAG &DAG,MulExpansionKind Kind,

7780SDValue LL,SDValue LH,SDValue RL,

7781SDValue RH) const{

7782SmallVector<SDValue, 2> Result;

7783bool Ok = expandMUL_LOHI(N->getOpcode(),N->getValueType(0),SDLoc(N),

7784N->getOperand(0),N->getOperand(1), Result, HiLoVT,

7785 DAG, Kind, LL, LH, RL, RH);

7786if (Ok) {

7787assert(Result.size() == 2);

7788Lo = Result[0];

7789Hi = Result[1];

7790 }

7791return Ok;

7792}

7793

7794// Optimize unsigned division or remainder by constants for types twice as large

7795// as a legal VT.

7796//

7797// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder

7798// can be computed

7799// as:

7800// Sum += __builtin_uadd_overflow(Lo, High, &Sum);

7801// Remainder = Sum % Constant

7802// This is based on "Remainder by Summing Digits" from Hacker's Delight.

7803//

7804// For division, we can compute the remainder using the algorithm described

7805// above, subtract it from the dividend to get an exact multiple of Constant.

7806// Then multiply that exact multiply by the multiplicative inverse modulo

7807// (1 << (BitWidth / 2)) to get the quotient.

7808

7809// If Constant is even, we can shift right the dividend and the divisor by the

7810// number of trailing zeros in Constant before applying the remainder algorithm.

7811// If we're after the quotient, we can subtract this value from the shifted

7812// dividend and multiply by the multiplicative inverse of the shifted divisor.

7813// If we want the remainder, we shift the value left by the number of trailing

7814// zeros and add the bits that were shifted out of the dividend.

7815boolTargetLowering::expandDIVREMByConstant(SDNode *N,

7816SmallVectorImpl<SDValue> &Result,

7817EVT HiLoVT,SelectionDAG &DAG,

7818SDValue LL,SDValue LH) const{

7819unsigned Opcode =N->getOpcode();

7820EVT VT =N->getValueType(0);

7821

7822// TODO: Support signed division/remainder.

7823if (Opcode ==ISD::SREM || Opcode ==ISD::SDIV || Opcode ==ISD::SDIVREM)

7824returnfalse;

7825assert(

7826 (Opcode ==ISD::UREM || Opcode ==ISD::UDIV || Opcode ==ISD::UDIVREM) &&

7827"Unexpected opcode");

7828

7829auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));

7830if (!CN)

7831returnfalse;

7832

7833APInt Divisor = CN->getAPIntValue();

7834unsignedBitWidth = Divisor.getBitWidth();

7835unsigned HBitWidth =BitWidth / 2;

7836assert(VT.getScalarSizeInBits() ==BitWidth &&

7837 HiLoVT.getScalarSizeInBits() == HBitWidth &&"Unexpected VTs");

7838

7839// Divisor needs to less than (1 << HBitWidth).

7840APInt HalfMaxPlus1 =APInt::getOneBitSet(BitWidth, HBitWidth);

7841if (Divisor.uge(HalfMaxPlus1))

7842returnfalse;

7843

7844// We depend on the UREM by constant optimization in DAGCombiner that requires

7845// high multiply.

7846if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&

7847 !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))

7848returnfalse;

7849

7850// Don't expand if optimizing for size.

7851if (DAG.shouldOptForSize())

7852returnfalse;

7853

7854// Early out for 0 or 1 divisors.

7855if (Divisor.ule(1))

7856returnfalse;

7857

7858// If the divisor is even, shift it until it becomes odd.

7859unsigned TrailingZeros = 0;

7860if (!Divisor[0]) {

7861 TrailingZeros = Divisor.countr_zero();

7862 Divisor.lshrInPlace(TrailingZeros);

7863 }

7864

7865SDLoc dl(N);

7866SDValue Sum;

7867SDValue PartialRem;

7868

7869// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and

7870// then add in the carry.

7871// TODO: If we can't split it in half, we might be able to split into 3 or

7872// more pieces using a smaller bit width.

7873if (HalfMaxPlus1.urem(Divisor).isOne()) {

7874assert(!LL == !LH &&"Expected both input halves or no input halves!");

7875if (!LL)

7876 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);

7877

7878// Shift the input by the number of TrailingZeros in the divisor. The

7879// shifted out bits will be added to the remainder later.

7880if (TrailingZeros) {

7881// Save the shifted off bits if we need the remainder.

7882if (Opcode !=ISD::UDIV) {

7883APInt Mask =APInt::getLowBitsSet(HBitWidth, TrailingZeros);

7884 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,

7885 DAG.getConstant(Mask, dl, HiLoVT));

7886 }

7887

7888 LL = DAG.getNode(

7889ISD::OR, dl, HiLoVT,

7890 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,

7891 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),

7892 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,

7893 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,

7894 HiLoVT, dl)));

7895 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,

7896 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));

7897 }

7898

7899// Use uaddo_carry if we can, otherwise use a compare to detect overflow.

7900EVT SetCCType =

7901getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);

7902if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {

7903SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);

7904 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);

7905 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,

7906 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));

7907 }else {

7908 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);

7909SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL,ISD::SETULT);

7910// If the boolean for the target is 0 or 1, we can add the setcc result

7911// directly.

7912if (getBooleanContents(HiLoVT) ==

7913TargetLoweringBase::ZeroOrOneBooleanContent)

7914 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);

7915else

7916 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),

7917 DAG.getConstant(0, dl, HiLoVT));

7918 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);

7919 }

7920 }

7921

7922// If we didn't find a sum, we can't do the expansion.

7923if (!Sum)

7924returnfalse;

7925

7926// Perform a HiLoVT urem on the Sum using truncated divisor.

7927SDValue RemL =

7928 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,

7929 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));

7930SDValue RemH = DAG.getConstant(0, dl, HiLoVT);

7931

7932if (Opcode !=ISD::UREM) {

7933// Subtract the remainder from the shifted dividend.

7934SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);

7935SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);

7936

7937 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);

7938

7939// Multiply by the multiplicative inverse of the divisor modulo

7940// (1 << BitWidth).

7941APInt MulFactor = Divisor.multiplicativeInverse();

7942

7943SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,

7944 DAG.getConstant(MulFactor, dl, VT));

7945

7946// Split the quotient into low and high parts.

7947SDValue QuotL, QuotH;

7948 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);

7949 Result.push_back(QuotL);

7950 Result.push_back(QuotH);

7951 }

7952

7953if (Opcode !=ISD::UDIV) {

7954// If we shifted the input, shift the remainder left and add the bits we

7955// shifted off the input.

7956if (TrailingZeros) {

7957APInt Mask =APInt::getLowBitsSet(HBitWidth, TrailingZeros);

7958 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,

7959 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));

7960 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);

7961 }

7962 Result.push_back(RemL);

7963 Result.push_back(DAG.getConstant(0, dl, HiLoVT));

7964 }

7965

7966returntrue;

7967}

7968

7969// Check that (every element of) Z is undef or not an exact multiple of BW.

7970staticboolisNonZeroModBitWidthOrUndef(SDValue Z,unsigned BW) {

7971returnISD::matchUnaryPredicate(

7972 Z,

7973 [=](ConstantSDNode *C) {return !C ||C->getAPIntValue().urem(BW) != 0; },

7974true);

7975}

7976

7977staticSDValue expandVPFunnelShift(SDNode *Node,SelectionDAG &DAG) {

7978EVT VT =Node->getValueType(0);

7979SDValue ShX, ShY;

7980SDValue ShAmt, InvShAmt;

7981SDValue X =Node->getOperand(0);

7982SDValue Y =Node->getOperand(1);

7983SDValue Z =Node->getOperand(2);

7984SDValue Mask =Node->getOperand(3);

7985SDValue VL =Node->getOperand(4);

7986

7987unsigned BW = VT.getScalarSizeInBits();

7988bool IsFSHL =Node->getOpcode() == ISD::VP_FSHL;

7989SDLoc DL(SDValue(Node, 0));

7990

7991EVT ShVT = Z.getValueType();

7992if (isNonZeroModBitWidthOrUndef(Z, BW)) {

7993// fshl: X << C | Y >> (BW - C)

7994// fshr: X << (BW - C) | Y >> C

7995// where C = Z % BW is not zero

7996SDValue BitWidthC = DAG.getConstant(BW,DL, ShVT);

7997 ShAmt = DAG.getNode(ISD::VP_UREM,DL, ShVT, Z, BitWidthC, Mask, VL);

7998 InvShAmt = DAG.getNode(ISD::VP_SUB,DL, ShVT, BitWidthC, ShAmt, Mask, VL);

7999 ShX = DAG.getNode(ISD::VP_SHL,DL, VT,X, IsFSHL ? ShAmt : InvShAmt, Mask,

8000 VL);

8001 ShY = DAG.getNode(ISD::VP_SRL,DL, VT,Y, IsFSHL ? InvShAmt : ShAmt, Mask,

8002 VL);

8003 }else {

8004// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))

8005// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)

8006SDValue BitMask = DAG.getConstant(BW - 1,DL, ShVT);

8007if (isPowerOf2_32(BW)) {

8008// Z % BW -> Z & (BW - 1)

8009 ShAmt = DAG.getNode(ISD::VP_AND,DL, ShVT, Z, BitMask, Mask, VL);

8010// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)

8011SDValue NotZ = DAG.getNode(ISD::VP_XOR,DL, ShVT, Z,

8012 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);

8013 InvShAmt = DAG.getNode(ISD::VP_AND,DL, ShVT, NotZ, BitMask, Mask, VL);

8014 }else {

8015SDValue BitWidthC = DAG.getConstant(BW,DL, ShVT);

8016 ShAmt = DAG.getNode(ISD::VP_UREM,DL, ShVT, Z, BitWidthC, Mask, VL);

8017 InvShAmt = DAG.getNode(ISD::VP_SUB,DL, ShVT, BitMask, ShAmt, Mask, VL);

8018 }

8019

8020SDValue One = DAG.getConstant(1,DL, ShVT);

8021if (IsFSHL) {

8022 ShX = DAG.getNode(ISD::VP_SHL,DL, VT,X, ShAmt, Mask, VL);

8023SDValue ShY1 = DAG.getNode(ISD::VP_SRL,DL, VT,Y, One, Mask, VL);

8024 ShY = DAG.getNode(ISD::VP_SRL,DL, VT, ShY1, InvShAmt, Mask, VL);

8025 }else {

8026SDValue ShX1 = DAG.getNode(ISD::VP_SHL,DL, VT,X, One, Mask, VL);

8027 ShX = DAG.getNode(ISD::VP_SHL,DL, VT, ShX1, InvShAmt, Mask, VL);

8028 ShY = DAG.getNode(ISD::VP_SRL,DL, VT,Y, ShAmt, Mask, VL);

8029 }

8030 }

8031return DAG.getNode(ISD::VP_OR,DL, VT, ShX, ShY, Mask, VL);

8032}

8033

8034SDValue TargetLowering::expandFunnelShift(SDNode *Node,

8035SelectionDAG &DAG) const{

8036if (Node->isVPOpcode())

8037returnexpandVPFunnelShift(Node, DAG);

8038

8039EVT VT = Node->getValueType(0);

8040

8041if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||

8042 !isOperationLegalOrCustom(ISD::SRL, VT) ||

8043 !isOperationLegalOrCustom(ISD::SUB, VT) ||

8044 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))

8045returnSDValue();

8046

8047SDValue X = Node->getOperand(0);

8048SDValue Y = Node->getOperand(1);

8049SDValue Z = Node->getOperand(2);

8050

8051unsigned BW = VT.getScalarSizeInBits();

8052bool IsFSHL = Node->getOpcode() ==ISD::FSHL;

8053SDLoc DL(SDValue(Node, 0));

8054

8055EVT ShVT = Z.getValueType();

8056

8057// If a funnel shift in the other direction is more supported, use it.

8058unsigned RevOpcode = IsFSHL ?ISD::FSHR :ISD::FSHL;

8059if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&

8060isOperationLegalOrCustom(RevOpcode, VT) &&isPowerOf2_32(BW)) {

8061if (isNonZeroModBitWidthOrUndef(Z, BW)) {

8062// fshl X, Y, Z -> fshr X, Y, -Z

8063// fshr X, Y, Z -> fshl X, Y, -Z

8064SDValue Zero = DAG.getConstant(0,DL, ShVT);

8065 Z = DAG.getNode(ISD::SUB,DL, VT, Zero, Z);

8066 }else {

8067// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z

8068// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z

8069SDValue One = DAG.getConstant(1,DL, ShVT);

8070if (IsFSHL) {

8071Y = DAG.getNode(RevOpcode,DL, VT,X,Y, One);

8072X = DAG.getNode(ISD::SRL,DL, VT,X, One);

8073 }else {

8074X = DAG.getNode(RevOpcode,DL, VT,X,Y, One);

8075Y = DAG.getNode(ISD::SHL,DL, VT,Y, One);

8076 }

8077 Z = DAG.getNOT(DL, Z, ShVT);

8078 }

8079return DAG.getNode(RevOpcode,DL, VT,X,Y, Z);

8080 }

8081

8082SDValue ShX, ShY;

8083SDValue ShAmt, InvShAmt;

8084if (isNonZeroModBitWidthOrUndef(Z, BW)) {

8085// fshl: X << C | Y >> (BW - C)

8086// fshr: X << (BW - C) | Y >> C

8087// where C = Z % BW is not zero

8088SDValue BitWidthC = DAG.getConstant(BW,DL, ShVT);

8089 ShAmt = DAG.getNode(ISD::UREM,DL, ShVT, Z, BitWidthC);

8090 InvShAmt = DAG.getNode(ISD::SUB,DL, ShVT, BitWidthC, ShAmt);

8091 ShX = DAG.getNode(ISD::SHL,DL, VT,X, IsFSHL ? ShAmt : InvShAmt);

8092 ShY = DAG.getNode(ISD::SRL,DL, VT,Y, IsFSHL ? InvShAmt : ShAmt);

8093 }else {

8094// fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))

8095// fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)

8096SDValue Mask = DAG.getConstant(BW - 1,DL, ShVT);

8097if (isPowerOf2_32(BW)) {

8098// Z % BW -> Z & (BW - 1)

8099 ShAmt = DAG.getNode(ISD::AND,DL, ShVT, Z, Mask);

8100// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)

8101 InvShAmt = DAG.getNode(ISD::AND,DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);

8102 }else {

8103SDValue BitWidthC = DAG.getConstant(BW,DL, ShVT);

8104 ShAmt = DAG.getNode(ISD::UREM,DL, ShVT, Z, BitWidthC);

8105 InvShAmt = DAG.getNode(ISD::SUB,DL, ShVT, Mask, ShAmt);

8106 }

8107

8108SDValue One = DAG.getConstant(1,DL, ShVT);

8109if (IsFSHL) {

8110 ShX = DAG.getNode(ISD::SHL,DL, VT,X, ShAmt);

8111SDValue ShY1 = DAG.getNode(ISD::SRL,DL, VT,Y, One);

8112 ShY = DAG.getNode(ISD::SRL,DL, VT, ShY1, InvShAmt);

8113 }else {

8114SDValue ShX1 = DAG.getNode(ISD::SHL,DL, VT,X, One);

8115 ShX = DAG.getNode(ISD::SHL,DL, VT, ShX1, InvShAmt);

8116 ShY = DAG.getNode(ISD::SRL,DL, VT,Y, ShAmt);

8117 }

8118 }

8119return DAG.getNode(ISD::OR,DL, VT, ShX, ShY);

8120}

8121

8122// TODO: Merge with expandFunnelShift.

8123SDValue TargetLowering::expandROT(SDNode *Node,bool AllowVectorOps,

8124SelectionDAG &DAG) const{

8125EVT VT = Node->getValueType(0);

8126unsigned EltSizeInBits = VT.getScalarSizeInBits();

8127bool IsLeft = Node->getOpcode() ==ISD::ROTL;

8128SDValue Op0 = Node->getOperand(0);

8129SDValue Op1 = Node->getOperand(1);

8130SDLoc DL(SDValue(Node, 0));

8131

8132EVT ShVT = Op1.getValueType();

8133SDValue Zero = DAG.getConstant(0,DL, ShVT);

8134

8135// If a rotate in the other direction is more supported, use it.

8136unsigned RevRot = IsLeft ?ISD::ROTR :ISD::ROTL;

8137if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&

8138isOperationLegalOrCustom(RevRot, VT) &&isPowerOf2_32(EltSizeInBits)) {

8139SDValue Sub = DAG.getNode(ISD::SUB,DL, ShVT, Zero, Op1);

8140return DAG.getNode(RevRot,DL, VT, Op0, Sub);

8141 }

8142

8143if (!AllowVectorOps && VT.isVector() &&

8144 (!isOperationLegalOrCustom(ISD::SHL, VT) ||

8145 !isOperationLegalOrCustom(ISD::SRL, VT) ||

8146 !isOperationLegalOrCustom(ISD::SUB, VT) ||

8147 !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||

8148 !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))

8149returnSDValue();

8150

8151unsigned ShOpc = IsLeft ?ISD::SHL :ISD::SRL;

8152unsigned HsOpc = IsLeft ?ISD::SRL :ISD::SHL;

8153SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1,DL, ShVT);

8154SDValue ShVal;

8155SDValue HsVal;

8156if (isPowerOf2_32(EltSizeInBits)) {

8157// (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))

8158// (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))

8159SDValue NegOp1 = DAG.getNode(ISD::SUB,DL, ShVT, Zero, Op1);

8160SDValue ShAmt = DAG.getNode(ISD::AND,DL, ShVT, Op1, BitWidthMinusOneC);

8161 ShVal = DAG.getNode(ShOpc,DL, VT, Op0, ShAmt);

8162SDValue HsAmt = DAG.getNode(ISD::AND,DL, ShVT, NegOp1, BitWidthMinusOneC);

8163 HsVal = DAG.getNode(HsOpc,DL, VT, Op0, HsAmt);

8164 }else {

8165// (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))

8166// (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))

8167SDValue BitWidthC = DAG.getConstant(EltSizeInBits,DL, ShVT);

8168SDValue ShAmt = DAG.getNode(ISD::UREM,DL, ShVT, Op1, BitWidthC);

8169 ShVal = DAG.getNode(ShOpc,DL, VT, Op0, ShAmt);

8170SDValue HsAmt = DAG.getNode(ISD::SUB,DL, ShVT, BitWidthMinusOneC, ShAmt);

8171SDValue One = DAG.getConstant(1,DL, ShVT);

8172 HsVal =

8173 DAG.getNode(HsOpc,DL, VT, DAG.getNode(HsOpc,DL, VT, Op0, One), HsAmt);

8174 }

8175return DAG.getNode(ISD::OR,DL, VT, ShVal, HsVal);

8176}

8177

8178voidTargetLowering::expandShiftParts(SDNode *Node,SDValue &Lo,SDValue &Hi,

8179SelectionDAG &DAG) const{

8180assert(Node->getNumOperands() == 3 &&"Not a double-shift!");

8181EVT VT = Node->getValueType(0);

8182unsigned VTBits = VT.getScalarSizeInBits();

8183assert(isPowerOf2_32(VTBits) &&"Power-of-two integer type expected");

8184

8185bool IsSHL = Node->getOpcode() ==ISD::SHL_PARTS;

8186bool IsSRA = Node->getOpcode() ==ISD::SRA_PARTS;

8187SDValue ShOpLo = Node->getOperand(0);

8188SDValue ShOpHi = Node->getOperand(1);

8189SDValue ShAmt = Node->getOperand(2);

8190EVT ShAmtVT = ShAmt.getValueType();

8191EVT ShAmtCCVT =

8192getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);

8193SDLoc dl(Node);

8194

8195// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and

8196// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized

8197// away during isel.

8198SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,

8199 DAG.getConstant(VTBits - 1, dl, ShAmtVT));

8200SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,

8201 DAG.getConstant(VTBits - 1, dl, ShAmtVT))

8202 : DAG.getConstant(0, dl, VT);

8203

8204SDValue Tmp2, Tmp3;

8205if (IsSHL) {

8206 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);

8207 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);

8208 }else {

8209 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);

8210 Tmp3 = DAG.getNode(IsSRA ?ISD::SRA :ISD::SRL, dl, VT, ShOpHi, SafeShAmt);

8211 }

8212

8213// If the shift amount is larger or equal than the width of a part we don't

8214// use the result from the FSHL/FSHR. Insert a test and select the appropriate

8215// values for large shift amounts.

8216SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,

8217 DAG.getConstant(VTBits, dl, ShAmtVT));

8218SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,

8219 DAG.getConstant(0, dl, ShAmtVT),ISD::SETNE);

8220

8221if (IsSHL) {

8222Hi = DAG.getNode(ISD::SELECT, dl, VT,Cond, Tmp3, Tmp2);

8223Lo = DAG.getNode(ISD::SELECT, dl, VT,Cond, Tmp1, Tmp3);

8224 }else {

8225Lo = DAG.getNode(ISD::SELECT, dl, VT,Cond, Tmp3, Tmp2);

8226Hi = DAG.getNode(ISD::SELECT, dl, VT,Cond, Tmp1, Tmp3);

8227 }

8228}

8229

8230boolTargetLowering::expandFP_TO_SINT(SDNode *Node,SDValue &Result,

8231SelectionDAG &DAG) const{

8232unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;

8233SDValue Src = Node->getOperand(OpNo);

8234EVT SrcVT = Src.getValueType();

8235EVT DstVT = Node->getValueType(0);

8236SDLoc dl(SDValue(Node, 0));

8237

8238// FIXME: Only f32 to i64 conversions are supported.

8239if (SrcVT != MVT::f32 || DstVT != MVT::i64)

8240returnfalse;

8241

8242if (Node->isStrictFPOpcode())

8243// When a NaN is converted to an integer a trap is allowed. We can't

8244// use this expansion here because it would eliminate that trap. Other

8245// traps are also allowed and cannot be eliminated. See

8246// IEEE 754-2008 sec 5.8.

8247returnfalse;

8248

8249// Expand f32 -> i64 conversion

8250// This algorithm comes from compiler-rt's implementation of fixsfdi:

8251// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c

8252unsigned SrcEltBits = SrcVT.getScalarSizeInBits();

8253EVT IntVT = SrcVT.changeTypeToInteger();

8254EVT IntShVT =getShiftAmountTy(IntVT, DAG.getDataLayout());

8255

8256SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);

8257SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);

8258SDValue Bias = DAG.getConstant(127, dl, IntVT);

8259SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);

8260SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);

8261SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);

8262

8263SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);

8264

8265SDValue ExponentBits = DAG.getNode(

8266ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),

8267 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));

8268SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);

8269

8270SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,

8271 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),

8272 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));

8273 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);

8274

8275SDValue R = DAG.getNode(ISD::OR, dl, IntVT,

8276 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),

8277 DAG.getConstant(0x00800000, dl, IntVT));

8278

8279 R = DAG.getZExtOrTrunc(R, dl, DstVT);

8280

8281 R = DAG.getSelectCC(

8282 dl,Exponent, ExponentLoBit,

8283 DAG.getNode(ISD::SHL, dl, DstVT, R,

8284 DAG.getZExtOrTrunc(

8285 DAG.getNode(ISD::SUB, dl, IntVT,Exponent, ExponentLoBit),

8286 dl, IntShVT)),

8287 DAG.getNode(ISD::SRL, dl, DstVT, R,

8288 DAG.getZExtOrTrunc(

8289 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit,Exponent),

8290 dl, IntShVT)),

8291ISD::SETGT);

8292

8293SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,

8294 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);

8295

8296 Result = DAG.getSelectCC(dl,Exponent, DAG.getConstant(0, dl, IntVT),

8297 DAG.getConstant(0, dl, DstVT), Ret,ISD::SETLT);

8298returntrue;

8299}

8300

8301boolTargetLowering::expandFP_TO_UINT(SDNode *Node,SDValue &Result,

8302SDValue &Chain,

8303SelectionDAG &DAG) const{

8304SDLoc dl(SDValue(Node, 0));

8305unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;

8306SDValue Src = Node->getOperand(OpNo);

8307

8308EVT SrcVT = Src.getValueType();

8309EVT DstVT = Node->getValueType(0);

8310EVT SetCCVT =

8311getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);

8312EVT DstSetCCVT =

8313getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);

8314

8315// Only expand vector types if we have the appropriate vector bit operations.

8316unsigned SIntOpcode = Node->isStrictFPOpcode() ?ISD::STRICT_FP_TO_SINT :

8317ISD::FP_TO_SINT;

8318if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||

8319 !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))

8320returnfalse;

8321

8322// If the maximum float value is smaller then the signed integer range,

8323// the destination signmask can't be represented by the float, so we can

8324// just use FP_TO_SINT directly.

8325constfltSemantics &APFSem = SrcVT.getFltSemantics();

8326APFloat APF(APFSem,APInt::getZero(SrcVT.getScalarSizeInBits()));

8327APInt SignMask =APInt::getSignMask(DstVT.getScalarSizeInBits());

8328if (APFloat::opOverflow &

8329 APF.convertFromAPInt(SignMask,false,APFloat::rmNearestTiesToEven)) {

8330if (Node->isStrictFPOpcode()) {

8331 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },

8332 { Node->getOperand(0), Src });

8333 Chain = Result.getValue(1);

8334 }else

8335 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);

8336returntrue;

8337 }

8338

8339// Don't expand it if there isn't cheap fsub instruction.

8340if (!isOperationLegalOrCustom(

8341 Node->isStrictFPOpcode() ?ISD::STRICT_FSUB :ISD::FSUB, SrcVT))

8342returnfalse;

8343

8344SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);

8345SDValue Sel;

8346

8347if (Node->isStrictFPOpcode()) {

8348 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst,ISD::SETLT,

8349 Node->getOperand(0),/*IsSignaling*/true);

8350 Chain = Sel.getValue(1);

8351 }else {

8352 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst,ISD::SETLT);

8353 }

8354

8355bool Strict = Node->isStrictFPOpcode() ||

8356shouldUseStrictFP_TO_INT(SrcVT, DstVT,/*IsSigned*/false);

8357

8358if (Strict) {

8359// Expand based on maximum range of FP_TO_SINT, if the value exceeds the

8360// signmask then offset (the result of which should be fully representable).

8361// Sel = Src < 0x8000000000000000

8362// FltOfs = select Sel, 0, 0x8000000000000000

8363// IntOfs = select Sel, 0, 0x8000000000000000

8364// Result = fp_to_sint(Src - FltOfs) ^ IntOfs

8365

8366// TODO: Should any fast-math-flags be set for the FSUB?

8367SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,

8368 DAG.getConstantFP(0.0, dl, SrcVT), Cst);

8369 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);

8370SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,

8371 DAG.getConstant(0, dl, DstVT),

8372 DAG.getConstant(SignMask, dl, DstVT));

8373SDValue SInt;

8374if (Node->isStrictFPOpcode()) {

8375SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },

8376 { Chain, Src, FltOfs });

8377 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },

8378 { Val.getValue(1), Val });

8379 Chain = SInt.getValue(1);

8380 }else {

8381SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);

8382 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);

8383 }

8384 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);

8385 }else {

8386// Expand based on maximum range of FP_TO_SINT:

8387// True = fp_to_sint(Src)

8388// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)

8389// Result = select (Src < 0x8000000000000000), True, False

8390

8391SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);

8392// TODO: Should any fast-math-flags be set for the FSUB?

8393SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,

8394 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));

8395 False = DAG.getNode(ISD::XOR, dl, DstVT, False,

8396 DAG.getConstant(SignMask, dl, DstVT));

8397 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);

8398 Result = DAG.getSelect(dl, DstVT, Sel, True, False);

8399 }

8400returntrue;

8401}

8402

8403boolTargetLowering::expandUINT_TO_FP(SDNode *Node,SDValue &Result,

8404SDValue &Chain,SelectionDAG &DAG) const{

8405// This transform is not correct for converting 0 when rounding mode is set

8406// to round toward negative infinity which will produce -0.0. So disable

8407// under strictfp.

8408if (Node->isStrictFPOpcode())

8409returnfalse;

8410

8411SDValue Src = Node->getOperand(0);

8412EVT SrcVT = Src.getValueType();

8413EVT DstVT = Node->getValueType(0);

8414

8415// If the input is known to be non-negative and SINT_TO_FP is legal then use

8416// it.

8417if (Node->getFlags().hasNonNeg() &&

8418isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {

8419 Result =

8420 DAG.getNode(ISD::SINT_TO_FP,SDLoc(Node), DstVT, Node->getOperand(0));

8421returntrue;

8422 }

8423

8424if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)

8425returnfalse;

8426

8427// Only expand vector types if we have the appropriate vector bit

8428// operations.

8429if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||

8430 !isOperationLegalOrCustom(ISD::FADD, DstVT) ||

8431 !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||

8432 !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||

8433 !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))

8434returnfalse;

8435

8436SDLoc dl(SDValue(Node, 0));

8437

8438// Implementation of unsigned i64 to f64 following the algorithm in

8439// __floatundidf in compiler_rt. This implementation performs rounding

8440// correctly in all rounding modes with the exception of converting 0

8441// when rounding toward negative infinity. In that case the fsub will

8442// produce -0.0. This will be added to +0.0 and produce -0.0 which is

8443// incorrect.

8444SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);

8445SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(

8446 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);

8447SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);

8448SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);

8449SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);

8450

8451SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);

8452SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);

8453SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT,Lo, TwoP52);

8454SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT,Hi, TwoP84);

8455SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);

8456SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);

8457SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);

8458 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);

8459returntrue;

8460}

8461

8462SDValue

8463TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,

8464SelectionDAG &DAG) const{

8465unsigned Opcode = Node->getOpcode();

8466assert((Opcode ==ISD::FMINNUM || Opcode ==ISD::FMAXNUM ||

8467 Opcode ==ISD::STRICT_FMINNUM || Opcode ==ISD::STRICT_FMAXNUM) &&

8468"Wrong opcode");

8469

8470if (Node->getFlags().hasNoNaNs()) {

8471ISD::CondCode Pred = Opcode ==ISD::FMINNUM ?ISD::SETLT :ISD::SETGT;

8472EVT VT = Node->getValueType(0);

8473if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||

8474 !isOperationLegalOrCustom(ISD::VSELECT, VT)) &&

8475 VT.isVector())

8476returnSDValue();

8477SDValue Op1 = Node->getOperand(0);

8478SDValue Op2 = Node->getOperand(1);

8479SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);

8480// Copy FMF flags, but always set the no-signed-zeros flag

8481// as this is implied by the FMINNUM/FMAXNUM semantics.

8482 SelCC->setFlags(Node->getFlags() |SDNodeFlags::NoSignedZeros);

8483return SelCC;

8484 }

8485

8486returnSDValue();

8487}

8488

8489SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,

8490SelectionDAG &DAG) const{

8491if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))

8492return Expanded;

8493

8494EVT VT = Node->getValueType(0);

8495if (VT.isScalableVector())

8496report_fatal_error(

8497"Expanding fminnum/fmaxnum for scalable vectors is undefined.");

8498

8499SDLoc dl(Node);

8500unsigned NewOp =

8501 Node->getOpcode() ==ISD::FMINNUM ?ISD::FMINNUM_IEEE :ISD::FMAXNUM_IEEE;

8502

8503if (isOperationLegalOrCustom(NewOp, VT)) {

8504SDValue Quiet0 = Node->getOperand(0);

8505SDValue Quiet1 = Node->getOperand(1);

8506

8507if (!Node->getFlags().hasNoNaNs()) {

8508// Insert canonicalizes if it's possible we need to quiet to get correct

8509// sNaN behavior.

8510if (!DAG.isKnownNeverSNaN(Quiet0)) {

8511 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,

8512 Node->getFlags());

8513 }

8514if (!DAG.isKnownNeverSNaN(Quiet1)) {

8515 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,

8516 Node->getFlags());

8517 }

8518 }

8519

8520return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());

8521 }

8522

8523// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that

8524// instead if there are no NaNs and there can't be an incompatible zero

8525// compare: at least one operand isn't +/-0, or there are no signed-zeros.

8526if ((Node->getFlags().hasNoNaNs() ||

8527 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&

8528 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&

8529 (Node->getFlags().hasNoSignedZeros() ||

8530 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||

8531 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {

8532unsigned IEEE2018Op =

8533 Node->getOpcode() ==ISD::FMINNUM ?ISD::FMINIMUM :ISD::FMAXIMUM;

8534if (isOperationLegalOrCustom(IEEE2018Op, VT))

8535return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),

8536 Node->getOperand(1), Node->getFlags());

8537 }

8538

8539if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))

8540return SelCC;

8541

8542returnSDValue();

8543}

8544

8545SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,

8546SelectionDAG &DAG) const{

8547if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))

8548return Expanded;

8549

8550SDLoc DL(N);

8551SDValue LHS =N->getOperand(0);

8552SDValue RHS =N->getOperand(1);

8553unsigned Opc =N->getOpcode();

8554EVT VT =N->getValueType(0);

8555EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

8556bool IsMax = Opc ==ISD::FMAXIMUM;

8557SDNodeFlags Flags =N->getFlags();

8558

8559// First, implement comparison not propagating NaN. If no native fmin or fmax

8560// available, use plain select with setcc instead.

8561SDValue MinMax;

8562unsigned CompOpcIeee = IsMax ?ISD::FMAXNUM_IEEE :ISD::FMINNUM_IEEE;

8563unsigned CompOpc = IsMax ?ISD::FMAXNUM :ISD::FMINNUM;

8564

8565// FIXME: We should probably define fminnum/fmaxnum variants with correct

8566// signed zero behavior.

8567bool MinMaxMustRespectOrderedZero =false;

8568

8569if (isOperationLegalOrCustom(CompOpcIeee, VT)) {

8570MinMax = DAG.getNode(CompOpcIeee,DL, VT,LHS,RHS, Flags);

8571 MinMaxMustRespectOrderedZero =true;

8572 }elseif (isOperationLegalOrCustom(CompOpc, VT)) {

8573MinMax = DAG.getNode(CompOpc,DL, VT,LHS,RHS, Flags);

8574 }else {

8575if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))

8576return DAG.UnrollVectorOp(N);

8577

8578// NaN (if exists) will be propagated later, so orderness doesn't matter.

8579SDValue Compare =

8580 DAG.getSetCC(DL, CCVT,LHS,RHS, IsMax ?ISD::SETOGT :ISD::SETOLT);

8581MinMax = DAG.getSelect(DL, VT, Compare,LHS,RHS, Flags);

8582 }

8583

8584// Propagate any NaN of both operands

8585if (!N->getFlags().hasNoNaNs() &&

8586 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {

8587ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),

8588APFloat::getNaN(VT.getFltSemantics()));

8589MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT,LHS,RHS,ISD::SETUO),

8590 DAG.getConstantFP(*FPNaN,DL, VT),MinMax, Flags);

8591 }

8592

8593// fminimum/fmaximum requires -0.0 less than +0.0

8594if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&

8595 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {

8596SDValue IsZero = DAG.getSetCC(DL, CCVT,MinMax,

8597 DAG.getConstantFP(0.0,DL, VT),ISD::SETOEQ);

8598SDValue TestZero =

8599 DAG.getTargetConstant(IsMax ?fcPosZero :fcNegZero,DL, MVT::i32);

8600SDValue LCmp = DAG.getSelect(

8601DL, VT, DAG.getNode(ISD::IS_FPCLASS,DL, CCVT,LHS, TestZero),LHS,

8602MinMax, Flags);

8603SDValue RCmp = DAG.getSelect(

8604DL, VT, DAG.getNode(ISD::IS_FPCLASS,DL, CCVT,RHS, TestZero),RHS,

8605 LCmp, Flags);

8606MinMax = DAG.getSelect(DL, VT, IsZero, RCmp,MinMax, Flags);

8607 }

8608

8609returnMinMax;

8610}

8611

8612SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,

8613SelectionDAG &DAG) const{

8614SDLoc DL(Node);

8615SDValue LHS = Node->getOperand(0);

8616SDValue RHS = Node->getOperand(1);

8617unsigned Opc = Node->getOpcode();

8618EVT VT = Node->getValueType(0);

8619EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

8620bool IsMax = Opc ==ISD::FMAXIMUMNUM;

8621constTargetOptions &Options = DAG.getTarget().Options;

8622SDNodeFlags Flags = Node->getFlags();

8623

8624unsigned NewOp =

8625 Opc ==ISD::FMINIMUMNUM ?ISD::FMINNUM_IEEE :ISD::FMAXNUM_IEEE;

8626

8627if (isOperationLegalOrCustom(NewOp, VT)) {

8628if (!Flags.hasNoNaNs()) {

8629// Insert canonicalizes if it's possible we need to quiet to get correct

8630// sNaN behavior.

8631if (!DAG.isKnownNeverSNaN(LHS)) {

8632LHS = DAG.getNode(ISD::FCANONICALIZE,DL, VT,LHS, Flags);

8633 }

8634if (!DAG.isKnownNeverSNaN(RHS)) {

8635RHS = DAG.getNode(ISD::FCANONICALIZE,DL, VT,RHS, Flags);

8636 }

8637 }

8638

8639return DAG.getNode(NewOp,DL, VT,LHS,RHS, Flags);

8640 }

8641

8642// We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has

8643// same behaviors for all of other cases: +0.0 vs -0.0 included.

8644if (Flags.hasNoNaNs() ||

8645 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {

8646unsigned IEEE2019Op =

8647 Opc ==ISD::FMINIMUMNUM ?ISD::FMINIMUM :ISD::FMAXIMUM;

8648if (isOperationLegalOrCustom(IEEE2019Op, VT))

8649return DAG.getNode(IEEE2019Op,DL, VT,LHS,RHS, Flags);

8650 }

8651

8652// FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return

8653// either one for +0.0 vs -0.0.

8654if ((Flags.hasNoNaNs() ||

8655 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&

8656 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||

8657 DAG.isKnownNeverZeroFloat(RHS))) {

8658unsigned IEEE2008Op = Opc ==ISD::FMINIMUMNUM ?ISD::FMINNUM :ISD::FMAXNUM;

8659if (isOperationLegalOrCustom(IEEE2008Op, VT))

8660return DAG.getNode(IEEE2008Op,DL, VT,LHS,RHS, Flags);

8661 }

8662

8663if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))

8664return DAG.UnrollVectorOp(Node);

8665

8666// If only one operand is NaN, override it with another operand.

8667if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {

8668LHS = DAG.getSelectCC(DL,LHS,LHS,RHS,LHS,ISD::SETUO);

8669 }

8670if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {

8671RHS = DAG.getSelectCC(DL,RHS,RHS,LHS,RHS,ISD::SETUO);

8672 }

8673

8674SDValue MinMax =

8675 DAG.getSelectCC(DL,LHS,RHS,LHS,RHS, IsMax ?ISD::SETGT :ISD::SETLT);

8676// If MinMax is NaN, let's quiet it.

8677if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&

8678 !DAG.isKnownNeverNaN(RHS)) {

8679MinMax = DAG.getNode(ISD::FCANONICALIZE,DL, VT,MinMax, Flags);

8680 }

8681

8682// Fixup signed zero behavior.

8683if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||

8684 DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {

8685returnMinMax;

8686 }

8687SDValue TestZero =

8688 DAG.getTargetConstant(IsMax ?fcPosZero :fcNegZero,DL, MVT::i32);

8689SDValue IsZero = DAG.getSetCC(DL, CCVT,MinMax,

8690 DAG.getConstantFP(0.0,DL, VT),ISD::SETEQ);

8691SDValue LCmp = DAG.getSelect(

8692DL, VT, DAG.getNode(ISD::IS_FPCLASS,DL, CCVT,LHS, TestZero),LHS,

8693MinMax, Flags);

8694SDValue RCmp = DAG.getSelect(

8695DL, VT, DAG.getNode(ISD::IS_FPCLASS,DL, CCVT,RHS, TestZero),RHS, LCmp,

8696 Flags);

8697return DAG.getSelect(DL, VT, IsZero, RCmp,MinMax, Flags);

8698}

8699

8700/// Returns a true value if if this FPClassTest can be performed with an ordered

8701/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns

8702/// std::nullopt if it cannot be performed as a compare with 0.

8703static std::optional<bool>isFCmpEqualZero(FPClassTest Test,

8704constfltSemantics &Semantics,

8705constMachineFunction &MF) {

8706FPClassTest OrderedMask =Test & ~fcNan;

8707FPClassTest NanTest =Test &fcNan;

8708bool IsOrdered = NanTest ==fcNone;

8709bool IsUnordered = NanTest ==fcNan;

8710

8711// Skip cases that are testing for only a qnan or snan.

8712if (!IsOrdered && !IsUnordered)

8713return std::nullopt;

8714

8715if (OrderedMask ==fcZero &&

8716 MF.getDenormalMode(Semantics).Input ==DenormalMode::IEEE)

8717return IsOrdered;

8718if (OrderedMask == (fcZero |fcSubnormal) &&

8719 MF.getDenormalMode(Semantics).inputsAreZero())

8720return IsOrdered;

8721return std::nullopt;

8722}

8723

8724SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT,SDValue Op,

8725constFPClassTest OrigTestMask,

8726SDNodeFlags Flags,constSDLoc &DL,

8727SelectionDAG &DAG) const{

8728EVT OperandVT =Op.getValueType();

8729assert(OperandVT.isFloatingPoint());

8730FPClassTest Test = OrigTestMask;

8731

8732// Degenerated cases.

8733if (Test ==fcNone)

8734return DAG.getBoolConstant(false,DL, ResultVT, OperandVT);

8735if (Test ==fcAllFlags)

8736return DAG.getBoolConstant(true,DL, ResultVT, OperandVT);

8737

8738// PPC double double is a pair of doubles, of which the higher part determines

8739// the value class.

8740if (OperandVT == MVT::ppcf128) {

8741Op = DAG.getNode(ISD::EXTRACT_ELEMENT,DL, MVT::f64,Op,

8742 DAG.getConstant(1,DL, MVT::i32));

8743 OperandVT = MVT::f64;

8744 }

8745

8746// Floating-point type properties.

8747EVT ScalarFloatVT = OperandVT.getScalarType();

8748constType *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());

8749constllvm::fltSemantics &Semantics = FloatTy->getFltSemantics();

8750bool IsF80 = (ScalarFloatVT == MVT::f80);

8751

8752// Some checks can be implemented using float comparisons, if floating point

8753// exceptions are ignored.

8754if (Flags.hasNoFPExcept() &&

8755isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {

8756FPClassTest FPTestMask =Test;

8757bool IsInvertedFP =false;

8758

8759if (FPClassTest InvertedFPCheck =

8760invertFPClassTestIfSimpler(FPTestMask,true)) {

8761 FPTestMask = InvertedFPCheck;

8762 IsInvertedFP =true;

8763 }

8764

8765ISD::CondCode OrderedCmpOpcode = IsInvertedFP ?ISD::SETUNE :ISD::SETOEQ;

8766ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ?ISD::SETONE :ISD::SETUEQ;

8767

8768// See if we can fold an | fcNan into an unordered compare.

8769FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;

8770

8771// Can't fold the ordered check if we're only testing for snan or qnan

8772// individually.

8773if ((FPTestMask &fcNan) !=fcNan)

8774 OrderedFPTestMask = FPTestMask;

8775

8776constbool IsOrdered = FPTestMask == OrderedFPTestMask;

8777

8778if (std::optional<bool> IsCmp0 =

8779isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());

8780 IsCmp0 && (isCondCodeLegalOrCustom(

8781 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,

8782 OperandVT.getScalarType().getSimpleVT()))) {

8783

8784// If denormals could be implicitly treated as 0, this is not equivalent

8785// to a compare with 0 since it will also be true for denormals.

8786return DAG.getSetCC(DL, ResultVT,Op,

8787 DAG.getConstantFP(0.0,DL, OperandVT),

8788 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);

8789 }

8790

8791if (FPTestMask ==fcNan &&

8792isCondCodeLegalOrCustom(IsInvertedFP ?ISD::SETO :ISD::SETUO,

8793 OperandVT.getScalarType().getSimpleVT()))

8794return DAG.getSetCC(DL, ResultVT,Op,Op,

8795 IsInvertedFP ?ISD::SETO :ISD::SETUO);

8796

8797bool IsOrderedInf = FPTestMask ==fcInf;

8798if ((FPTestMask ==fcInf || FPTestMask == (fcInf |fcNan)) &&

8799isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode

8800 : UnorderedCmpOpcode,

8801 OperandVT.getScalarType().getSimpleVT()) &&

8802isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType()) &&

8803 (isOperationLegal(ISD::ConstantFP, OperandVT.getScalarType()) ||

8804 (OperandVT.isVector() &&

8805isOperationLegalOrCustom(ISD::BUILD_VECTOR, OperandVT)))) {

8806// isinf(x) --> fabs(x) == inf

8807SDValue Abs = DAG.getNode(ISD::FABS,DL, OperandVT,Op);

8808SDValue Inf =

8809 DAG.getConstantFP(APFloat::getInf(Semantics),DL, OperandVT);

8810return DAG.getSetCC(DL, ResultVT, Abs, Inf,

8811 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);

8812 }

8813

8814if ((OrderedFPTestMask ==fcPosInf || OrderedFPTestMask ==fcNegInf) &&

8815isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode

8816 : UnorderedCmpOpcode,

8817 OperandVT.getSimpleVT())) {

8818// isposinf(x) --> x == inf

8819// isneginf(x) --> x == -inf

8820// isposinf(x) || nan --> x u== inf

8821// isneginf(x) || nan --> x u== -inf

8822

8823SDValue Inf = DAG.getConstantFP(

8824APFloat::getInf(Semantics, OrderedFPTestMask ==fcNegInf),DL,

8825 OperandVT);

8826return DAG.getSetCC(DL, ResultVT,Op, Inf,

8827 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);

8828 }

8829

8830if (OrderedFPTestMask == (fcSubnormal |fcZero) && !IsOrdered) {

8831// TODO: Could handle ordered case, but it produces worse code for

8832// x86. Maybe handle ordered if fabs is free?

8833

8834ISD::CondCode OrderedOp = IsInvertedFP ?ISD::SETUGE :ISD::SETOLT;

8835ISD::CondCode UnorderedOp = IsInvertedFP ?ISD::SETOGE :ISD::SETULT;

8836

8837if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,

8838 OperandVT.getScalarType().getSimpleVT())) {

8839// (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal

8840

8841// TODO: Maybe only makes sense if fabs is free. Integer test of

8842// exponent bits seems better for x86.

8843SDValue Abs = DAG.getNode(ISD::FABS,DL, OperandVT,Op);

8844SDValue SmallestNormal = DAG.getConstantFP(

8845APFloat::getSmallestNormalized(Semantics),DL, OperandVT);

8846return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,

8847 IsOrdered ? OrderedOp : UnorderedOp);

8848 }

8849 }

8850

8851if (FPTestMask ==fcNormal) {

8852// TODO: Handle unordered

8853ISD::CondCode IsFiniteOp = IsInvertedFP ?ISD::SETUGE :ISD::SETOLT;

8854ISD::CondCode IsNormalOp = IsInvertedFP ?ISD::SETOLT :ISD::SETUGE;

8855

8856if (isCondCodeLegalOrCustom(IsFiniteOp,

8857 OperandVT.getScalarType().getSimpleVT()) &&

8858isCondCodeLegalOrCustom(IsNormalOp,

8859 OperandVT.getScalarType().getSimpleVT()) &&

8860isFAbsFree(OperandVT)) {

8861// isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)

8862SDValue Inf =

8863 DAG.getConstantFP(APFloat::getInf(Semantics),DL, OperandVT);

8864SDValue SmallestNormal = DAG.getConstantFP(

8865APFloat::getSmallestNormalized(Semantics),DL, OperandVT);

8866

8867SDValue Abs = DAG.getNode(ISD::FABS,DL, OperandVT,Op);

8868SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);

8869SDValue IsNormal =

8870 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);

8871unsigned LogicOp = IsInvertedFP ?ISD::OR :ISD::AND;

8872return DAG.getNode(LogicOp,DL, ResultVT, IsFinite, IsNormal);

8873 }

8874 }

8875 }

8876

8877// Some checks may be represented as inversion of simpler check, for example

8878// "inf|normal|subnormal|zero" => !"nan".

8879bool IsInverted =false;

8880

8881if (FPClassTest InvertedCheck =invertFPClassTestIfSimpler(Test,false)) {

8882Test = InvertedCheck;

8883 IsInverted =true;

8884 }

8885

8886// In the general case use integer operations.

8887unsigned BitSize = OperandVT.getScalarSizeInBits();

8888EVT IntVT =EVT::getIntegerVT(*DAG.getContext(), BitSize);

8889if (OperandVT.isVector())

8890 IntVT =EVT::getVectorVT(*DAG.getContext(), IntVT,

8891 OperandVT.getVectorElementCount());

8892SDValue OpAsInt = DAG.getBitcast(IntVT,Op);

8893

8894// Various masks.

8895APInt SignBit =APInt::getSignMask(BitSize);

8896APInt ValueMask =APInt::getSignedMaxValue(BitSize);// All bits but sign.

8897APInt Inf =APFloat::getInf(Semantics).bitcastToAPInt();// Exp and int bit.

8898constunsigned ExplicitIntBitInF80 = 63;

8899APInt ExpMask = Inf;

8900if (IsF80)

8901 ExpMask.clearBit(ExplicitIntBitInF80);

8902APInt AllOneMantissa =APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;

8903APInt QNaNBitMask =

8904APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);

8905APInt InvertionMask =APInt::getAllOnes(ResultVT.getScalarSizeInBits());

8906

8907SDValue ValueMaskV = DAG.getConstant(ValueMask,DL, IntVT);

8908SDValue SignBitV = DAG.getConstant(SignBit,DL, IntVT);

8909SDValue ExpMaskV = DAG.getConstant(ExpMask,DL, IntVT);

8910SDValue ZeroV = DAG.getConstant(0,DL, IntVT);

8911SDValue InfV = DAG.getConstant(Inf,DL, IntVT);

8912SDValue ResultInvertionMask = DAG.getConstant(InvertionMask,DL, ResultVT);

8913

8914SDValue Res;

8915constauto appendResult = [&](SDValue PartialRes) {

8916if (PartialRes) {

8917if (Res)

8918 Res = DAG.getNode(ISD::OR,DL, ResultVT, Res, PartialRes);

8919else

8920 Res = PartialRes;

8921 }

8922 };

8923

8924SDValue IntBitIsSetV;// Explicit integer bit in f80 mantissa is set.

8925constauto getIntBitIsSet = [&]() ->SDValue {

8926if (!IntBitIsSetV) {

8927APInt IntBitMask(BitSize, 0);

8928 IntBitMask.setBit(ExplicitIntBitInF80);

8929SDValue IntBitMaskV = DAG.getConstant(IntBitMask,DL, IntVT);

8930SDValue IntBitV = DAG.getNode(ISD::AND,DL, IntVT, OpAsInt, IntBitMaskV);

8931 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV,ISD::SETNE);

8932 }

8933return IntBitIsSetV;

8934 };

8935

8936// Split the value into sign bit and absolute value.

8937SDValue AbsV = DAG.getNode(ISD::AND,DL, IntVT, OpAsInt, ValueMaskV);

8938SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,

8939 DAG.getConstant(0,DL, IntVT),ISD::SETLT);

8940

8941// Tests that involve more than one class should be processed first.

8942SDValue PartialRes;

8943

8944if (IsF80)

8945 ;// Detect finite numbers of f80 by checking individual classes because

8946// they have different settings of the explicit integer bit.

8947elseif ((Test &fcFinite) ==fcFinite) {

8948// finite(V) ==> abs(V) < exp_mask

8949 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV,ISD::SETLT);

8950Test &= ~fcFinite;

8951 }elseif ((Test &fcFinite) ==fcPosFinite) {

8952// finite(V) && V > 0 ==> V < exp_mask

8953 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV,ISD::SETULT);

8954Test &= ~fcPosFinite;

8955 }elseif ((Test &fcFinite) ==fcNegFinite) {

8956// finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1

8957 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV,ISD::SETLT);

8958 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, SignV);

8959Test &= ~fcNegFinite;

8960 }

8961 appendResult(PartialRes);

8962

8963if (FPClassTest PartialCheck =Test & (fcZero |fcSubnormal)) {

8964// fcZero | fcSubnormal => test all exponent bits are 0

8965// TODO: Handle sign bit specific cases

8966if (PartialCheck == (fcZero |fcSubnormal)) {

8967SDValue ExpBits = DAG.getNode(ISD::AND,DL, IntVT, OpAsInt, ExpMaskV);

8968SDValue ExpIsZero =

8969 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV,ISD::SETEQ);

8970 appendResult(ExpIsZero);

8971Test &= ~PartialCheck &fcAllFlags;

8972 }

8973 }

8974

8975// Check for individual classes.

8976

8977if (unsigned PartialCheck =Test &fcZero) {

8978if (PartialCheck ==fcPosZero)

8979 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV,ISD::SETEQ);

8980elseif (PartialCheck ==fcZero)

8981 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV,ISD::SETEQ);

8982else// ISD::fcNegZero

8983 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV,ISD::SETEQ);

8984 appendResult(PartialRes);

8985 }

8986

8987if (unsigned PartialCheck =Test &fcSubnormal) {

8988// issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)

8989// issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)

8990SDValue V = (PartialCheck ==fcPosSubnormal) ? OpAsInt : AbsV;

8991SDValue MantissaV = DAG.getConstant(AllOneMantissa,DL, IntVT);

8992SDValue VMinusOneV =

8993 DAG.getNode(ISD::SUB,DL, IntVT, V, DAG.getConstant(1,DL, IntVT));

8994 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV,ISD::SETULT);

8995if (PartialCheck ==fcNegSubnormal)

8996 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, SignV);

8997 appendResult(PartialRes);

8998 }

8999

9000if (unsigned PartialCheck =Test &fcInf) {

9001if (PartialCheck ==fcPosInf)

9002 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV,ISD::SETEQ);

9003elseif (PartialCheck ==fcInf)

9004 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV,ISD::SETEQ);

9005else {// ISD::fcNegInf

9006APInt NegInf =APFloat::getInf(Semantics,true).bitcastToAPInt();

9007SDValue NegInfV = DAG.getConstant(NegInf,DL, IntVT);

9008 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV,ISD::SETEQ);

9009 }

9010 appendResult(PartialRes);

9011 }

9012

9013if (unsigned PartialCheck =Test &fcNan) {

9014APInt InfWithQnanBit = Inf | QNaNBitMask;

9015SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit,DL, IntVT);

9016if (PartialCheck ==fcNan) {

9017// isnan(V) ==> abs(V) > int(inf)

9018 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV,ISD::SETGT);

9019if (IsF80) {

9020// Recognize unsupported values as NaNs for compatibility with glibc.

9021// In them (exp(V)==0) == int_bit.

9022SDValue ExpBits = DAG.getNode(ISD::AND,DL, IntVT, AbsV, ExpMaskV);

9023SDValue ExpIsZero =

9024 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV,ISD::SETEQ);

9025SDValue IsPseudo =

9026 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero,ISD::SETEQ);

9027 PartialRes = DAG.getNode(ISD::OR,DL, ResultVT, PartialRes, IsPseudo);

9028 }

9029 }elseif (PartialCheck ==fcQNan) {

9030// isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)

9031 PartialRes =

9032 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV,ISD::SETGE);

9033 }else {// ISD::fcSNan

9034// issignaling(V) ==> abs(V) > unsigned(Inf) &&

9035// abs(V) < (unsigned(Inf) | quiet_bit)

9036SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV,ISD::SETGT);

9037SDValue IsNotQnan =

9038 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV,ISD::SETLT);

9039 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, IsNan, IsNotQnan);

9040 }

9041 appendResult(PartialRes);

9042 }

9043

9044if (unsigned PartialCheck =Test &fcNormal) {

9045// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))

9046APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));

9047SDValue ExpLSBV = DAG.getConstant(ExpLSB,DL, IntVT);

9048SDValue ExpMinus1 = DAG.getNode(ISD::SUB,DL, IntVT, AbsV, ExpLSBV);

9049APInt ExpLimit = ExpMask - ExpLSB;

9050SDValue ExpLimitV = DAG.getConstant(ExpLimit,DL, IntVT);

9051 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV,ISD::SETULT);

9052if (PartialCheck ==fcNegNormal)

9053 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, SignV);

9054elseif (PartialCheck ==fcPosNormal) {

9055SDValue PosSignV =

9056 DAG.getNode(ISD::XOR,DL, ResultVT, SignV, ResultInvertionMask);

9057 PartialRes = DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, PosSignV);

9058 }

9059if (IsF80)

9060 PartialRes =

9061 DAG.getNode(ISD::AND,DL, ResultVT, PartialRes, getIntBitIsSet());

9062 appendResult(PartialRes);

9063 }

9064

9065if (!Res)

9066return DAG.getConstant(IsInverted,DL, ResultVT);

9067if (IsInverted)

9068 Res = DAG.getNode(ISD::XOR,DL, ResultVT, Res, ResultInvertionMask);

9069return Res;

9070}

9071

9072// Only expand vector types if we have the appropriate vector bit operations.

9073staticboolcanExpandVectorCTPOP(constTargetLowering &TLI,EVT VT) {

9074assert(VT.isVector() &&"Expected vector type");

9075unsigned Len = VT.getScalarSizeInBits();

9076return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&

9077 TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&

9078 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&

9079 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&

9080 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);

9081}

9082

9083SDValue TargetLowering::expandCTPOP(SDNode *Node,SelectionDAG &DAG) const{

9084SDLoc dl(Node);

9085EVT VT = Node->getValueType(0);

9086EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());

9087SDValue Op = Node->getOperand(0);

9088unsigned Len = VT.getScalarSizeInBits();

9089assert(VT.isInteger() &&"CTPOP not implemented for this type.");

9090

9091// TODO: Add support for irregular type lengths.

9092if (!(Len <= 128 && Len % 8 == 0))

9093returnSDValue();

9094

9095// Only expand vector types if we have the appropriate vector bit operations.

9096if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))

9097returnSDValue();

9098

9099// This is the "best" algorithm from

9100// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel

9101SDValue Mask55 =

9102 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x55)), dl, VT);

9103SDValue Mask33 =

9104 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x33)), dl, VT);

9105SDValue Mask0F =

9106 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x0F)), dl, VT);

9107

9108// v = v - ((v >> 1) & 0x55555555...)

9109Op = DAG.getNode(ISD::SUB, dl, VT,Op,

9110 DAG.getNode(ISD::AND, dl, VT,

9111 DAG.getNode(ISD::SRL, dl, VT,Op,

9112 DAG.getConstant(1, dl, ShVT)),

9113 Mask55));

9114// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)

9115Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT,Op, Mask33),

9116 DAG.getNode(ISD::AND, dl, VT,

9117 DAG.getNode(ISD::SRL, dl, VT,Op,

9118 DAG.getConstant(2, dl, ShVT)),

9119 Mask33));

9120// v = (v + (v >> 4)) & 0x0F0F0F0F...

9121Op = DAG.getNode(ISD::AND, dl, VT,

9122 DAG.getNode(ISD::ADD, dl, VT,Op,

9123 DAG.getNode(ISD::SRL, dl, VT,Op,

9124 DAG.getConstant(4, dl, ShVT))),

9125 Mask0F);

9126

9127if (Len <= 8)

9128returnOp;

9129

9130// Avoid the multiply if we only have 2 bytes to add.

9131// TODO: Only doing this for scalars because vectors weren't as obviously

9132// improved.

9133if (Len == 16 && !VT.isVector()) {

9134// v = (v + (v >> 8)) & 0x00FF;

9135return DAG.getNode(ISD::AND, dl, VT,

9136 DAG.getNode(ISD::ADD, dl, VT,Op,

9137 DAG.getNode(ISD::SRL, dl, VT,Op,

9138 DAG.getConstant(8, dl, ShVT))),

9139 DAG.getConstant(0xFF, dl, VT));

9140 }

9141

9142// v = (v * 0x01010101...) >> (Len - 8)

9143SDValue V;

9144if (isOperationLegalOrCustomOrPromote(

9145ISD::MUL,getTypeToTransformTo(*DAG.getContext(), VT))) {

9146SDValue Mask01 =

9147 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x01)), dl, VT);

9148 V = DAG.getNode(ISD::MUL, dl, VT,Op, Mask01);

9149 }else {

9150 V =Op;

9151for (unsigned Shift = 8; Shift < Len; Shift *= 2) {

9152SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);

9153 V = DAG.getNode(ISD::ADD, dl, VT, V,

9154 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));

9155 }

9156 }

9157return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));

9158}

9159

9160SDValue TargetLowering::expandVPCTPOP(SDNode *Node,SelectionDAG &DAG) const{

9161SDLoc dl(Node);

9162EVT VT = Node->getValueType(0);

9163EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());

9164SDValue Op = Node->getOperand(0);

9165SDValue Mask = Node->getOperand(1);

9166SDValue VL = Node->getOperand(2);

9167unsigned Len = VT.getScalarSizeInBits();

9168assert(VT.isInteger() &&"VP_CTPOP not implemented for this type.");

9169

9170// TODO: Add support for irregular type lengths.

9171if (!(Len <= 128 && Len % 8 == 0))

9172returnSDValue();

9173

9174// This is same algorithm of expandCTPOP from

9175// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel

9176SDValue Mask55 =

9177 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x55)), dl, VT);

9178SDValue Mask33 =

9179 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x33)), dl, VT);

9180SDValue Mask0F =

9181 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x0F)), dl, VT);

9182

9183SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;

9184

9185// v = v - ((v >> 1) & 0x55555555...)

9186 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,

9187 DAG.getNode(ISD::VP_SRL, dl, VT,Op,

9188 DAG.getConstant(1, dl, ShVT), Mask, VL),

9189 Mask55, Mask, VL);

9190Op = DAG.getNode(ISD::VP_SUB, dl, VT,Op, Tmp1, Mask, VL);

9191

9192// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)

9193 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT,Op, Mask33, Mask, VL);

9194 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,

9195 DAG.getNode(ISD::VP_SRL, dl, VT,Op,

9196 DAG.getConstant(2, dl, ShVT), Mask, VL),

9197 Mask33, Mask, VL);

9198Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);

9199

9200// v = (v + (v >> 4)) & 0x0F0F0F0F...

9201 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(4, dl, ShVT),

9202 Mask, VL),

9203 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT,Op, Tmp4, Mask, VL);

9204Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);

9205

9206if (Len <= 8)

9207returnOp;

9208

9209// v = (v * 0x01010101...) >> (Len - 8)

9210SDValue V;

9211if (isOperationLegalOrCustomOrPromote(

9212 ISD::VP_MUL,getTypeToTransformTo(*DAG.getContext(), VT))) {

9213SDValue Mask01 =

9214 DAG.getConstant(APInt::getSplat(Len,APInt(8, 0x01)), dl, VT);

9215 V = DAG.getNode(ISD::VP_MUL, dl, VT,Op, Mask01, Mask, VL);

9216 }else {

9217 V =Op;

9218for (unsigned Shift = 8; Shift < Len; Shift *= 2) {

9219SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);

9220 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,

9221 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),

9222 Mask, VL);

9223 }

9224 }

9225return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),

9226 Mask, VL);

9227}

9228

9229SDValue TargetLowering::expandCTLZ(SDNode *Node,SelectionDAG &DAG) const{

9230SDLoc dl(Node);

9231EVT VT = Node->getValueType(0);

9232EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());

9233SDValue Op = Node->getOperand(0);

9234unsigned NumBitsPerElt = VT.getScalarSizeInBits();

9235

9236// If the non-ZERO_UNDEF version is supported we can use that instead.

9237if (Node->getOpcode() ==ISD::CTLZ_ZERO_UNDEF &&

9238isOperationLegalOrCustom(ISD::CTLZ, VT))

9239return DAG.getNode(ISD::CTLZ, dl, VT,Op);

9240

9241// If the ZERO_UNDEF version is supported use that and handle the zero case.

9242if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {

9243EVT SetCCVT =

9244getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

9245SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT,Op);

9246SDValue Zero = DAG.getConstant(0, dl, VT);

9247SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT,Op, Zero,ISD::SETEQ);

9248return DAG.getSelect(dl, VT, SrcIsZero,

9249 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);

9250 }

9251

9252// Only expand vector types if we have the appropriate vector bit operations.

9253// This includes the operations needed to expand CTPOP if it isn't supported.

9254if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||

9255 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&

9256 !canExpandVectorCTPOP(*this, VT)) ||

9257 !isOperationLegalOrCustom(ISD::SRL, VT) ||

9258 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))

9259returnSDValue();

9260

9261// for now, we do this:

9262// x = x | (x >> 1);

9263// x = x | (x >> 2);

9264// ...

9265// x = x | (x >>16);

9266// x = x | (x >>32); // for 64-bit input

9267// return popcount(~x);

9268//

9269// Ref: "Hacker's Delight" by Henry Warren

9270for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {

9271SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);

9272Op = DAG.getNode(ISD::OR, dl, VT,Op,

9273 DAG.getNode(ISD::SRL, dl, VT,Op, Tmp));

9274 }

9275Op = DAG.getNOT(dl,Op, VT);

9276return DAG.getNode(ISD::CTPOP, dl, VT,Op);

9277}

9278

9279SDValue TargetLowering::expandVPCTLZ(SDNode *Node,SelectionDAG &DAG) const{

9280SDLoc dl(Node);

9281EVT VT = Node->getValueType(0);

9282EVT ShVT =getShiftAmountTy(VT, DAG.getDataLayout());

9283SDValue Op = Node->getOperand(0);

9284SDValue Mask = Node->getOperand(1);

9285SDValue VL = Node->getOperand(2);

9286unsigned NumBitsPerElt = VT.getScalarSizeInBits();

9287

9288// do this:

9289// x = x | (x >> 1);

9290// x = x | (x >> 2);

9291// ...

9292// x = x | (x >>16);

9293// x = x | (x >>32); // for 64-bit input

9294// return popcount(~x);

9295for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {

9296SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);

9297Op = DAG.getNode(ISD::VP_OR, dl, VT,Op,

9298 DAG.getNode(ISD::VP_SRL, dl, VT,Op, Tmp, Mask, VL), Mask,

9299 VL);

9300 }

9301Op = DAG.getNode(ISD::VP_XOR, dl, VT,Op, DAG.getAllOnesConstant(dl, VT),

9302 Mask, VL);

9303return DAG.getNode(ISD::VP_CTPOP, dl, VT,Op, Mask, VL);

9304}

9305

9306SDValue TargetLowering::CTTZTableLookup(SDNode *Node,SelectionDAG &DAG,

9307constSDLoc &DL,EVT VT,SDValue Op,

9308unsignedBitWidth) const{

9309if (BitWidth != 32 &&BitWidth != 64)

9310returnSDValue();

9311APInt DeBruijn =BitWidth == 32 ?APInt(32, 0x077CB531U)

9312 :APInt(64, 0x0218A392CD3D5DBFULL);

9313constDataLayout &TD = DAG.getDataLayout();

9314MachinePointerInfo PtrInfo =

9315MachinePointerInfo::getConstantPool(DAG.getMachineFunction());

9316unsigned ShiftAmt =BitWidth -Log2_32(BitWidth);

9317SDValue Neg = DAG.getNode(ISD::SUB,DL, VT, DAG.getConstant(0,DL, VT),Op);

9318SDValue Lookup = DAG.getNode(

9319ISD::SRL,DL, VT,

9320 DAG.getNode(ISD::MUL,DL, VT, DAG.getNode(ISD::AND,DL, VT,Op, Neg),

9321 DAG.getConstant(DeBruijn,DL, VT)),

9322 DAG.getConstant(ShiftAmt,DL, VT));

9323Lookup = DAG.getSExtOrTrunc(Lookup,DL,getPointerTy(TD));

9324

9325SmallVector<uint8_t> Table(BitWidth, 0);

9326for (unsigned i = 0; i <BitWidth; i++) {

9327APInt Shl = DeBruijn.shl(i);

9328APInt Lshr = Shl.lshr(ShiftAmt);

9329 Table[Lshr.getZExtValue()] = i;

9330 }

9331

9332// Create a ConstantArray in Constant Pool

9333auto *CA =ConstantDataArray::get(*DAG.getContext(), Table);

9334SDValue CPIdx = DAG.getConstantPool(CA,getPointerTy(TD),

9335 TD.getPrefTypeAlign(CA->getType()));

9336SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD,DL, VT, DAG.getEntryNode(),

9337 DAG.getMemBasePlusOffset(CPIdx,Lookup,DL),

9338 PtrInfo, MVT::i8);

9339if (Node->getOpcode() ==ISD::CTTZ_ZERO_UNDEF)

9340return ExtLoad;

9341

9342EVT SetCCVT =

9343getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

9344SDValue Zero = DAG.getConstant(0,DL, VT);

9345SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT,Op, Zero,ISD::SETEQ);

9346return DAG.getSelect(DL, VT, SrcIsZero,

9347 DAG.getConstant(BitWidth,DL, VT), ExtLoad);

9348}

9349

9350SDValue TargetLowering::expandCTTZ(SDNode *Node,SelectionDAG &DAG) const{

9351SDLoc dl(Node);

9352EVT VT = Node->getValueType(0);

9353SDValue Op = Node->getOperand(0);

9354unsigned NumBitsPerElt = VT.getScalarSizeInBits();

9355

9356// If the non-ZERO_UNDEF version is supported we can use that instead.

9357if (Node->getOpcode() ==ISD::CTTZ_ZERO_UNDEF &&

9358isOperationLegalOrCustom(ISD::CTTZ, VT))

9359return DAG.getNode(ISD::CTTZ, dl, VT,Op);

9360

9361// If the ZERO_UNDEF version is supported use that and handle the zero case.

9362if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {

9363EVT SetCCVT =

9364getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

9365SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT,Op);

9366SDValue Zero = DAG.getConstant(0, dl, VT);

9367SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT,Op, Zero,ISD::SETEQ);

9368return DAG.getSelect(dl, VT, SrcIsZero,

9369 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);

9370 }

9371

9372// Only expand vector types if we have the appropriate vector bit operations.

9373// This includes the operations needed to expand CTPOP if it isn't supported.

9374if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||

9375 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&

9376 !isOperationLegalOrCustom(ISD::CTLZ, VT) &&

9377 !canExpandVectorCTPOP(*this, VT)) ||

9378 !isOperationLegalOrCustom(ISD::SUB, VT) ||

9379 !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||

9380 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))

9381returnSDValue();

9382

9383// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.

9384if (!VT.isVector() &&isOperationExpand(ISD::CTPOP, VT) &&

9385 !isOperationLegal(ISD::CTLZ, VT))

9386if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT,Op, NumBitsPerElt))

9387return V;

9388

9389// for now, we use: { return popcount(~x & (x - 1)); }

9390// unless the target has ctlz but not ctpop, in which case we use:

9391// { return 32 - nlz(~x & (x-1)); }

9392// Ref: "Hacker's Delight" by Henry Warren

9393SDValue Tmp = DAG.getNode(

9394ISD::AND, dl, VT, DAG.getNOT(dl,Op, VT),

9395 DAG.getNode(ISD::SUB, dl, VT,Op, DAG.getConstant(1, dl, VT)));

9396

9397// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.

9398if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {

9399return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),

9400 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));

9401 }

9402

9403return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);

9404}

9405

9406SDValue TargetLowering::expandVPCTTZ(SDNode *Node,SelectionDAG &DAG) const{

9407SDValue Op = Node->getOperand(0);

9408SDValue Mask = Node->getOperand(1);

9409SDValue VL = Node->getOperand(2);

9410SDLoc dl(Node);

9411EVT VT = Node->getValueType(0);

9412

9413// Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))

9414SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT,Op,

9415 DAG.getAllOnesConstant(dl, VT), Mask, VL);

9416SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT,Op,

9417 DAG.getConstant(1, dl, VT), Mask, VL);

9418SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);

9419return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);

9420}

9421

9422SDValue TargetLowering::expandVPCTTZElements(SDNode *N,

9423SelectionDAG &DAG) const{

9424// %cond = to_bool_vec %source

9425// %splat = splat /*val=*/VL

9426// %tz = step_vector

9427// %v = vp.select %cond, /*true=*/tz, /*false=*/%splat

9428// %r = vp.reduce.umin %v

9429SDLoc DL(N);

9430SDValue Source =N->getOperand(0);

9431SDValue Mask =N->getOperand(1);

9432SDValue EVL =N->getOperand(2);

9433EVT SrcVT = Source.getValueType();

9434EVT ResVT =N->getValueType(0);

9435EVT ResVecVT =

9436EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());

9437

9438// Convert to boolean vector.

9439if (SrcVT.getScalarType() != MVT::i1) {

9440SDValue AllZero = DAG.getConstant(0,DL, SrcVT);

9441 SrcVT =EVT::getVectorVT(*DAG.getContext(), MVT::i1,

9442 SrcVT.getVectorElementCount());

9443 Source = DAG.getNode(ISD::VP_SETCC,DL, SrcVT, Source, AllZero,

9444 DAG.getCondCode(ISD::SETNE), Mask, EVL);

9445 }

9446

9447SDValue ExtEVL = DAG.getZExtOrTrunc(EVL,DL, ResVT);

9448SDValue Splat = DAG.getSplat(ResVecVT,DL, ExtEVL);

9449SDValue StepVec = DAG.getStepVector(DL, ResVecVT);

9450SDValue Select =

9451 DAG.getNode(ISD::VP_SELECT,DL, ResVecVT, Source, StepVec,Splat, EVL);

9452return DAG.getNode(ISD::VP_REDUCE_UMIN,DL, ResVT, ExtEVL,Select, Mask, EVL);

9453}

9454

9455SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,

9456SelectionDAG &DAG) const{

9457SDLoc DL(N);

9458SDValue Mask =N->getOperand(0);

9459EVT MaskVT = Mask.getValueType();

9460EVT BoolVT = MaskVT.getScalarType();

9461

9462// Find a suitable type for a stepvector.

9463ConstantRange VScaleRange(1,/*isFullSet=*/true);// Fixed length default.

9464if (MaskVT.isScalableVector())

9465 VScaleRange =getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);

9466constTargetLowering &TLI = DAG.getTargetLoweringInfo();

9467unsigned EltWidth = TLI.getBitWidthForCttzElements(

9468 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),

9469/*ZeroIsPoison=*/true, &VScaleRange);

9470EVT StepVT =MVT::getIntegerVT(EltWidth);

9471EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);

9472

9473// If promotion is required to make the type legal, do it here; promotion

9474// of integers within LegalizeVectorOps is looking for types of the same

9475// size but with a smaller number of larger elements, not the usual larger

9476// size with the same number of larger elements.

9477if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==

9478TargetLowering::TypePromoteInteger) {

9479 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);

9480 StepVT = StepVecVT.getVectorElementType();

9481 }

9482

9483// Zero out lanes with inactive elements, then find the highest remaining

9484// value from the stepvector.

9485SDValue Zeroes = DAG.getConstant(0,DL, StepVecVT);

9486SDValue StepVec = DAG.getStepVector(DL, StepVecVT);

9487SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);

9488SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX,DL, StepVT, ActiveElts);

9489return DAG.getZExtOrTrunc(HighestIdx,DL,N->getValueType(0));

9490}

9491

9492SDValue TargetLowering::expandABS(SDNode *N,SelectionDAG &DAG,

9493bool IsNegative) const{

9494SDLoc dl(N);

9495EVT VT =N->getValueType(0);

9496SDValue Op =N->getOperand(0);

9497

9498// abs(x) -> smax(x,sub(0,x))

9499if (!IsNegative &&isOperationLegal(ISD::SUB, VT) &&

9500isOperationLegal(ISD::SMAX, VT)) {

9501SDValue Zero = DAG.getConstant(0, dl, VT);

9502Op = DAG.getFreeze(Op);

9503return DAG.getNode(ISD::SMAX, dl, VT,Op,

9504 DAG.getNode(ISD::SUB, dl, VT, Zero,Op));

9505 }

9506

9507// abs(x) -> umin(x,sub(0,x))

9508if (!IsNegative &&isOperationLegal(ISD::SUB, VT) &&

9509isOperationLegal(ISD::UMIN, VT)) {

9510SDValue Zero = DAG.getConstant(0, dl, VT);

9511Op = DAG.getFreeze(Op);

9512return DAG.getNode(ISD::UMIN, dl, VT,Op,

9513 DAG.getNode(ISD::SUB, dl, VT, Zero,Op));

9514 }

9515

9516// 0 - abs(x) -> smin(x, sub(0,x))

9517if (IsNegative &&isOperationLegal(ISD::SUB, VT) &&

9518isOperationLegal(ISD::SMIN, VT)) {

9519SDValue Zero = DAG.getConstant(0, dl, VT);

9520Op = DAG.getFreeze(Op);

9521return DAG.getNode(ISD::SMIN, dl, VT,Op,

9522 DAG.getNode(ISD::SUB, dl, VT, Zero,Op));

9523 }

9524

9525// Only expand vector types if we have the appropriate vector operations.

9526if (VT.isVector() &&

9527 (!isOperationLegalOrCustom(ISD::SRA, VT) ||

9528 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||

9529 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||

9530 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))

9531returnSDValue();

9532

9533Op = DAG.getFreeze(Op);

9534SDValue Shift = DAG.getNode(

9535ISD::SRA, dl, VT,Op,

9536 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));

9537SDValue Xor = DAG.getNode(ISD::XOR, dl, VT,Op, Shift);

9538

9539// abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)

9540if (!IsNegative)

9541return DAG.getNode(ISD::SUB, dl, VT,Xor, Shift);

9542

9543// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))

9544return DAG.getNode(ISD::SUB, dl, VT, Shift,Xor);

9545}

9546

9547SDValue TargetLowering::expandABD(SDNode *N,SelectionDAG &DAG) const{

9548SDLoc dl(N);

9549EVT VT =N->getValueType(0);

9550SDValue LHS = DAG.getFreeze(N->getOperand(0));

9551SDValue RHS = DAG.getFreeze(N->getOperand(1));

9552bool IsSigned =N->getOpcode() ==ISD::ABDS;

9553

9554// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))

9555// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))

9556unsigned MaxOpc = IsSigned ?ISD::SMAX :ISD::UMAX;

9557unsigned MinOpc = IsSigned ?ISD::SMIN :ISD::UMIN;

9558if (isOperationLegal(MaxOpc, VT) &&isOperationLegal(MinOpc, VT)) {

9559SDValue Max = DAG.getNode(MaxOpc, dl, VT,LHS,RHS);

9560SDValue Min = DAG.getNode(MinOpc, dl, VT,LHS,RHS);

9561return DAG.getNode(ISD::SUB, dl, VT, Max, Min);

9562 }

9563

9564// abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))

9565if (!IsSigned &&isOperationLegal(ISD::USUBSAT, VT))

9566return DAG.getNode(ISD::OR, dl, VT,

9567 DAG.getNode(ISD::USUBSAT, dl, VT,LHS,RHS),

9568 DAG.getNode(ISD::USUBSAT, dl, VT,RHS,LHS));

9569

9570// If the subtract doesn't overflow then just use abs(sub())

9571// NOTE: don't use frozen operands for value tracking.

9572bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&

9573 DAG.SignBitIsZero(N->getOperand(0));

9574

9575if (DAG.willNotOverflowSub(IsSigned || IsNonNegative,N->getOperand(0),

9576N->getOperand(1)))

9577return DAG.getNode(ISD::ABS, dl, VT,

9578 DAG.getNode(ISD::SUB, dl, VT,LHS,RHS));

9579

9580if (DAG.willNotOverflowSub(IsSigned || IsNonNegative,N->getOperand(1),

9581N->getOperand(0)))

9582return DAG.getNode(ISD::ABS, dl, VT,

9583 DAG.getNode(ISD::SUB, dl, VT,RHS,LHS));

9584

9585EVT CCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

9586ISD::CondCode CC = IsSigned ?ISD::CondCode::SETGT :ISD::CondCode::SETUGT;

9587SDValue Cmp = DAG.getSetCC(dl, CCVT,LHS,RHS,CC);

9588

9589// Branchless expansion iff cmp result is allbits:

9590// abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))

9591// abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))

9592if (CCVT == VT &&getBooleanContents(VT) ==ZeroOrNegativeOneBooleanContent) {

9593SDValue Diff = DAG.getNode(ISD::SUB, dl, VT,LHS,RHS);

9594SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);

9595return DAG.getNode(ISD::SUB, dl, VT, Cmp,Xor);

9596 }

9597

9598// Similar to the branchless expansion, use the (sign-extended) usubo overflow

9599// flag if the (scalar) type is illegal as this is more likely to legalize

9600// cleanly:

9601// abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))

9602if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {

9603SDValue USubO =

9604 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});

9605SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));

9606SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);

9607return DAG.getNode(ISD::SUB, dl, VT,Xor, Cmp);

9608 }

9609

9610// FIXME: Should really try to split the vector in case it's legal on a

9611// subvector.

9612if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))

9613return DAG.UnrollVectorOp(N);

9614

9615// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))

9616// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))

9617return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT,LHS,RHS),

9618 DAG.getNode(ISD::SUB, dl, VT,RHS,LHS));

9619}

9620

9621SDValue TargetLowering::expandAVG(SDNode *N,SelectionDAG &DAG) const{

9622SDLoc dl(N);

9623EVT VT =N->getValueType(0);

9624SDValue LHS =N->getOperand(0);

9625SDValue RHS =N->getOperand(1);

9626

9627unsigned Opc =N->getOpcode();

9628bool IsFloor = Opc ==ISD::AVGFLOORS || Opc ==ISD::AVGFLOORU;

9629bool IsSigned = Opc ==ISD::AVGCEILS || Opc ==ISD::AVGFLOORS;

9630unsigned SumOpc = IsFloor ?ISD::ADD :ISD::SUB;

9631unsigned SignOpc = IsFloor ?ISD::AND :ISD::OR;

9632unsigned ShiftOpc = IsSigned ?ISD::SRA :ISD::SRL;

9633unsigned ExtOpc = IsSigned ?ISD::SIGN_EXTEND :ISD::ZERO_EXTEND;

9634assert((Opc ==ISD::AVGFLOORS || Opc ==ISD::AVGCEILS ||

9635 Opc ==ISD::AVGFLOORU || Opc ==ISD::AVGCEILU) &&

9636"Unknown AVG node");

9637

9638// If the operands are already extended, we can add+shift.

9639bool IsExt =

9640 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&

9641 DAG.ComputeNumSignBits(RHS) >= 2) ||

9642 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&

9643 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);

9644if (IsExt) {

9645SDValue Sum = DAG.getNode(ISD::ADD, dl, VT,LHS,RHS);

9646if (!IsFloor)

9647 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));

9648return DAG.getNode(ShiftOpc, dl, VT, Sum,

9649 DAG.getShiftAmountConstant(1, VT, dl));

9650 }

9651

9652// For scalars, see if we can efficiently extend/truncate to use add+shift.

9653if (VT.isScalarInteger()) {

9654unsigned BW = VT.getScalarSizeInBits();

9655EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);

9656if (isTypeLegal(ExtVT) &&isTruncateFree(ExtVT, VT)) {

9657LHS = DAG.getNode(ExtOpc, dl, ExtVT,LHS);

9658RHS = DAG.getNode(ExtOpc, dl, ExtVT,RHS);

9659SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT,LHS,RHS);

9660if (!IsFloor)

9661 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,

9662 DAG.getConstant(1, dl, ExtVT));

9663// Just use SRL as we will be truncating away the extended sign bits.

9664 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,

9665 DAG.getShiftAmountConstant(1, ExtVT, dl));

9666return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);

9667 }

9668 }

9669

9670// avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))

9671if (Opc ==ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {

9672SDValue UAddWithOverflow =

9673 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS,LHS});

9674

9675SDValue Sum = UAddWithOverflow.getValue(0);

9676SDValue Overflow = UAddWithOverflow.getValue(1);

9677

9678// Right shift the sum by 1

9679SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,

9680 DAG.getShiftAmountConstant(1, VT, dl));

9681

9682SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);

9683SDValue OverflowShl = DAG.getNode(

9684ISD::SHL, dl, VT, ZeroExtOverflow,

9685 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));

9686

9687return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);

9688 }

9689

9690// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))

9691// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))

9692// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))

9693// avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))

9694LHS = DAG.getFreeze(LHS);

9695RHS = DAG.getFreeze(RHS);

9696SDValue Sign = DAG.getNode(SignOpc, dl, VT,LHS,RHS);

9697SDValue Xor = DAG.getNode(ISD::XOR, dl, VT,LHS,RHS);

9698SDValue Shift =

9699 DAG.getNode(ShiftOpc, dl, VT,Xor, DAG.getShiftAmountConstant(1, VT, dl));

9700return DAG.getNode(SumOpc, dl, VT, Sign, Shift);

9701}

9702

9703SDValue TargetLowering::expandBSWAP(SDNode *N,SelectionDAG &DAG) const{

9704SDLoc dl(N);

9705EVT VT =N->getValueType(0);

9706SDValue Op =N->getOperand(0);

9707

9708if (!VT.isSimple())

9709returnSDValue();

9710

9711EVT SHVT =getShiftAmountTy(VT, DAG.getDataLayout());

9712SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;

9713switch (VT.getSimpleVT().getScalarType().SimpleTy) {

9714default:

9715returnSDValue();

9716case MVT::i16:

9717// Use a rotate by 8. This can be further expanded if necessary.

9718return DAG.getNode(ISD::ROTL, dl, VT,Op, DAG.getConstant(8, dl, SHVT));

9719case MVT::i32:

9720 Tmp4 = DAG.getNode(ISD::SHL, dl, VT,Op, DAG.getConstant(24, dl, SHVT));

9721 Tmp3 = DAG.getNode(ISD::AND, dl, VT,Op,

9722 DAG.getConstant(0xFF00, dl, VT));

9723 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));

9724 Tmp2 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT));

9725 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));

9726 Tmp1 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(24, dl, SHVT));

9727 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);

9728 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);

9729return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);

9730case MVT::i64:

9731 Tmp8 = DAG.getNode(ISD::SHL, dl, VT,Op, DAG.getConstant(56, dl, SHVT));

9732 Tmp7 = DAG.getNode(ISD::AND, dl, VT,Op,

9733 DAG.getConstant(255ULL<<8, dl, VT));

9734 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));

9735 Tmp6 = DAG.getNode(ISD::AND, dl, VT,Op,

9736 DAG.getConstant(255ULL<<16, dl, VT));

9737 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));

9738 Tmp5 = DAG.getNode(ISD::AND, dl, VT,Op,

9739 DAG.getConstant(255ULL<<24, dl, VT));

9740 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));

9741 Tmp4 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT));

9742 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,

9743 DAG.getConstant(255ULL<<24, dl, VT));

9744 Tmp3 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(24, dl, SHVT));

9745 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,

9746 DAG.getConstant(255ULL<<16, dl, VT));

9747 Tmp2 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(40, dl, SHVT));

9748 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,

9749 DAG.getConstant(255ULL<<8, dl, VT));

9750 Tmp1 = DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(56, dl, SHVT));

9751 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);

9752 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);

9753 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);

9754 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);

9755 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);

9756 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);

9757return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);

9758 }

9759}

9760

9761SDValue TargetLowering::expandVPBSWAP(SDNode *N,SelectionDAG &DAG) const{

9762SDLoc dl(N);

9763EVT VT =N->getValueType(0);

9764SDValue Op =N->getOperand(0);

9765SDValue Mask =N->getOperand(1);

9766SDValue EVL =N->getOperand(2);

9767

9768if (!VT.isSimple())

9769returnSDValue();

9770

9771EVT SHVT =getShiftAmountTy(VT, DAG.getDataLayout());

9772SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;

9773switch (VT.getSimpleVT().getScalarType().SimpleTy) {

9774default:

9775returnSDValue();

9776case MVT::i16:

9777 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT,Op, DAG.getConstant(8, dl, SHVT),

9778 Mask, EVL);

9779 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT),

9780 Mask, EVL);

9781return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);

9782case MVT::i32:

9783 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT,Op, DAG.getConstant(24, dl, SHVT),

9784 Mask, EVL);

9785 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,Op, DAG.getConstant(0xFF00, dl, VT),

9786 Mask, EVL);

9787 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),

9788 Mask, EVL);

9789 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT),

9790 Mask, EVL);

9791 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,

9792 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);

9793 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(24, dl, SHVT),

9794 Mask, EVL);

9795 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);

9796 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);

9797return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);

9798case MVT::i64:

9799 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT,Op, DAG.getConstant(56, dl, SHVT),

9800 Mask, EVL);

9801 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT,Op,

9802 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);

9803 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),

9804 Mask, EVL);

9805 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT,Op,

9806 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);

9807 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),

9808 Mask, EVL);

9809 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT,Op,

9810 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);

9811 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),

9812 Mask, EVL);

9813 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(8, dl, SHVT),

9814 Mask, EVL);

9815 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,

9816 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);

9817 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(24, dl, SHVT),

9818 Mask, EVL);

9819 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,

9820 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);

9821 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(40, dl, SHVT),

9822 Mask, EVL);

9823 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,

9824 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);

9825 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT,Op, DAG.getConstant(56, dl, SHVT),

9826 Mask, EVL);

9827 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);

9828 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);

9829 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);

9830 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);

9831 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);

9832 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);

9833return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);

9834 }

9835}

9836

9837SDValue TargetLowering::expandBITREVERSE(SDNode *N,SelectionDAG &DAG) const{

9838SDLoc dl(N);

9839EVT VT =N->getValueType(0);

9840SDValue Op =N->getOperand(0);

9841EVT SHVT =getShiftAmountTy(VT, DAG.getDataLayout());

9842unsigned Sz = VT.getScalarSizeInBits();

9843

9844SDValue Tmp, Tmp2, Tmp3;

9845

9846// If we can, perform BSWAP first and then the mask+swap the i4, then i2

9847// and finally the i1 pairs.

9848// TODO: We can easily support i4/i2 legal types if any target ever does.

9849if (Sz >= 8 &&isPowerOf2_32(Sz)) {

9850// Create the masks - repeating the pattern every byte.

9851APInt Mask4 =APInt::getSplat(Sz,APInt(8, 0x0F));

9852APInt Mask2 =APInt::getSplat(Sz,APInt(8, 0x33));

9853APInt Mask1 =APInt::getSplat(Sz,APInt(8, 0x55));

9854

9855// BSWAP if the type is wider than a single byte.

9856 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT,Op) :Op);

9857

9858// swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)

9859 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));

9860 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));

9861 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));

9862 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));

9863 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

9864

9865// swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)

9866 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));

9867 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));

9868 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));

9869 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));

9870 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

9871

9872// swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)

9873 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));

9874 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));

9875 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));

9876 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));

9877 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

9878return Tmp;

9879 }

9880

9881 Tmp = DAG.getConstant(0, dl, VT);

9882for (unsignedI = 0, J = Sz-1;I < Sz; ++I, --J) {

9883if (I < J)

9884 Tmp2 =

9885 DAG.getNode(ISD::SHL, dl, VT,Op, DAG.getConstant(J -I, dl, SHVT));

9886else

9887 Tmp2 =

9888 DAG.getNode(ISD::SRL, dl, VT,Op, DAG.getConstant(I - J, dl, SHVT));

9889

9890APInt Shift =APInt::getOneBitSet(Sz, J);

9891 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));

9892 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);

9893 }

9894

9895return Tmp;

9896}

9897

9898SDValue TargetLowering::expandVPBITREVERSE(SDNode *N,SelectionDAG &DAG) const{

9899assert(N->getOpcode() == ISD::VP_BITREVERSE);

9900

9901SDLoc dl(N);

9902EVT VT =N->getValueType(0);

9903SDValue Op =N->getOperand(0);

9904SDValue Mask =N->getOperand(1);

9905SDValue EVL =N->getOperand(2);

9906EVT SHVT =getShiftAmountTy(VT, DAG.getDataLayout());

9907unsigned Sz = VT.getScalarSizeInBits();

9908

9909SDValue Tmp, Tmp2, Tmp3;

9910

9911// If we can, perform BSWAP first and then the mask+swap the i4, then i2

9912// and finally the i1 pairs.

9913// TODO: We can easily support i4/i2 legal types if any target ever does.

9914if (Sz >= 8 &&isPowerOf2_32(Sz)) {

9915// Create the masks - repeating the pattern every byte.

9916APInt Mask4 =APInt::getSplat(Sz,APInt(8, 0x0F));

9917APInt Mask2 =APInt::getSplat(Sz,APInt(8, 0x33));

9918APInt Mask1 =APInt::getSplat(Sz,APInt(8, 0x55));

9919

9920// BSWAP if the type is wider than a single byte.

9921 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT,Op, Mask, EVL) :Op);

9922

9923// swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)

9924 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),

9925 Mask, EVL);

9926 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,

9927 DAG.getConstant(Mask4, dl, VT), Mask, EVL);

9928 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),

9929 Mask, EVL);

9930 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),

9931 Mask, EVL);

9932 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);

9933

9934// swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)

9935 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),

9936 Mask, EVL);

9937 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,

9938 DAG.getConstant(Mask2, dl, VT), Mask, EVL);

9939 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),

9940 Mask, EVL);

9941 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),

9942 Mask, EVL);

9943 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);

9944

9945// swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)

9946 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),

9947 Mask, EVL);

9948 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,

9949 DAG.getConstant(Mask1, dl, VT), Mask, EVL);

9950 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),

9951 Mask, EVL);

9952 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),

9953 Mask, EVL);

9954 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);

9955return Tmp;

9956 }

9957returnSDValue();

9958}

9959

9960std::pair<SDValue, SDValue>

9961TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,

9962SelectionDAG &DAG) const{

9963SDLoc SL(LD);

9964SDValue Chain = LD->getChain();

9965SDValue BasePTR = LD->getBasePtr();

9966EVT SrcVT = LD->getMemoryVT();

9967EVT DstVT = LD->getValueType(0);

9968ISD::LoadExtType ExtType = LD->getExtensionType();

9969

9970if (SrcVT.isScalableVector())

9971report_fatal_error("Cannot scalarize scalable vector loads");

9972

9973unsigned NumElem = SrcVT.getVectorNumElements();

9974

9975EVT SrcEltVT = SrcVT.getScalarType();

9976EVT DstEltVT = DstVT.getScalarType();

9977

9978// A vector must always be stored in memory as-is, i.e. without any padding

9979// between the elements, since various code depend on it, e.g. in the

9980// handling of a bitcast of a vector type to int, which may be done with a

9981// vector store followed by an integer load. A vector that does not have

9982// elements that are byte-sized must therefore be stored as an integer

9983// built out of the extracted vector elements.

9984if (!SrcEltVT.isByteSized()) {

9985unsigned NumLoadBits = SrcVT.getStoreSizeInBits();

9986EVT LoadVT =EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);

9987

9988unsigned NumSrcBits = SrcVT.getSizeInBits();

9989EVT SrcIntVT =EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);

9990

9991unsigned SrcEltBits = SrcEltVT.getSizeInBits();

9992SDValue SrcEltBitMask = DAG.getConstant(

9993APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);

9994

9995// Load the whole vector and avoid masking off the top bits as it makes

9996// the codegen worse.

9997SDValue Load =

9998 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,

9999 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),

10000 LD->getMemOperand()->getFlags(), LD->getAAInfo());

10001

10002SmallVector<SDValue, 8> Vals;

10003for (unsignedIdx = 0;Idx < NumElem; ++Idx) {

10004unsigned ShiftIntoIdx =

10005 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) -Idx :Idx);

10006SDValue ShiftAmount = DAG.getShiftAmountConstant(

10007 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);

10008SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);

10009SDValue Elt =

10010 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);

10011SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);

10012

10013if (ExtType !=ISD::NON_EXTLOAD) {

10014unsigned ExtendOp =ISD::getExtForLoadExtType(false, ExtType);

10015 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);

10016 }

10017

10018 Vals.push_back(Scalar);

10019 }

10020

10021SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);

10022return std::make_pair(Value, Load.getValue(1));

10023 }

10024

10025unsigned Stride = SrcEltVT.getSizeInBits() / 8;

10026assert(SrcEltVT.isByteSized());

10027

10028SmallVector<SDValue, 8> Vals;

10029SmallVector<SDValue, 8> LoadChains;

10030

10031for (unsignedIdx = 0;Idx < NumElem; ++Idx) {

10032SDValue ScalarLoad =

10033 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,

10034 LD->getPointerInfo().getWithOffset(Idx * Stride),

10035 SrcEltVT, LD->getOriginalAlign(),

10036 LD->getMemOperand()->getFlags(), LD->getAAInfo());

10037

10038 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR,TypeSize::getFixed(Stride));

10039

10040 Vals.push_back(ScalarLoad.getValue(0));

10041 LoadChains.push_back(ScalarLoad.getValue(1));

10042 }

10043

10044SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);

10045SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);

10046

10047return std::make_pair(Value, NewChain);

10048}

10049

10050SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,

10051SelectionDAG &DAG) const{

10052SDLoc SL(ST);

10053

10054SDValue Chain = ST->getChain();

10055SDValue BasePtr = ST->getBasePtr();

10056SDValue Value = ST->getValue();

10057EVT StVT = ST->getMemoryVT();

10058

10059if (StVT.isScalableVector())

10060report_fatal_error("Cannot scalarize scalable vector stores");

10061

10062// The type of the data we want to save

10063EVT RegVT =Value.getValueType();

10064EVT RegSclVT = RegVT.getScalarType();

10065

10066// The type of data as saved in memory.

10067EVT MemSclVT = StVT.getScalarType();

10068

10069unsigned NumElem = StVT.getVectorNumElements();

10070

10071// A vector must always be stored in memory as-is, i.e. without any padding

10072// between the elements, since various code depend on it, e.g. in the

10073// handling of a bitcast of a vector type to int, which may be done with a

10074// vector store followed by an integer load. A vector that does not have

10075// elements that are byte-sized must therefore be stored as an integer

10076// built out of the extracted vector elements.

10077if (!MemSclVT.isByteSized()) {

10078unsigned NumBits = StVT.getSizeInBits();

10079EVT IntVT =EVT::getIntegerVT(*DAG.getContext(), NumBits);

10080

10081SDValue CurrVal = DAG.getConstant(0, SL, IntVT);

10082

10083for (unsignedIdx = 0;Idx < NumElem; ++Idx) {

10084SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT,Value,

10085 DAG.getVectorIdxConstant(Idx, SL));

10086SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);

10087SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);

10088unsigned ShiftIntoIdx =

10089 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) -Idx :Idx);

10090SDValue ShiftAmount =

10091 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);

10092SDValue ShiftedElt =

10093 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);

10094 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);

10095 }

10096

10097return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),

10098 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),

10099 ST->getAAInfo());

10100 }

10101

10102// Store Stride in bytes

10103unsigned Stride = MemSclVT.getSizeInBits() / 8;

10104assert(Stride &&"Zero stride!");

10105// Extract each of the elements from the original vector and save them into

10106// memory individually.

10107SmallVector<SDValue, 8> Stores;

10108for (unsignedIdx = 0;Idx < NumElem; ++Idx) {

10109SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT,Value,

10110 DAG.getVectorIdxConstant(Idx, SL));

10111

10112SDValue Ptr =

10113 DAG.getObjectPtrOffset(SL, BasePtr,TypeSize::getFixed(Idx * Stride));

10114

10115// This scalar TruncStore may be illegal, but we legalize it later.

10116SDValue Store = DAG.getTruncStore(

10117 Chain, SL, Elt,Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),

10118 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),

10119 ST->getAAInfo());

10120

10121 Stores.push_back(Store);

10122 }

10123

10124return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);

10125}

10126

10127std::pair<SDValue, SDValue>

10128TargetLowering::expandUnalignedLoad(LoadSDNode *LD,SelectionDAG &DAG) const{

10129assert(LD->getAddressingMode() ==ISD::UNINDEXED &&

10130"unaligned indexed loads not implemented!");

10131SDValue Chain = LD->getChain();

10132SDValue Ptr = LD->getBasePtr();

10133EVT VT = LD->getValueType(0);

10134EVT LoadedVT = LD->getMemoryVT();

10135SDLoc dl(LD);

10136auto &MF = DAG.getMachineFunction();

10137

10138if (VT.isFloatingPoint() || VT.isVector()) {

10139EVT intVT =EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());

10140if (isTypeLegal(intVT) &&isTypeLegal(LoadedVT)) {

10141if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&

10142 LoadedVT.isVector()) {

10143// Scalarize the load and let the individual components be handled.

10144return scalarizeVectorLoad(LD, DAG);

10145 }

10146

10147// Expand to a (misaligned) integer load of the same size,

10148// then bitconvert to floating point or vector.

10149SDValue newLoad = DAG.getLoad(intVT, dl, Chain,Ptr,

10150 LD->getMemOperand());

10151SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);

10152if (LoadedVT != VT)

10153 Result = DAG.getNode(VT.isFloatingPoint() ?ISD::FP_EXTEND :

10154ISD::ANY_EXTEND, dl, VT, Result);

10155

10156return std::make_pair(Result, newLoad.getValue(1));

10157 }

10158

10159// Copy the value to a (aligned) stack slot using (unaligned) integer

10160// loads and stores, then do a (aligned) load from the stack slot.

10161MVT RegVT =getRegisterType(*DAG.getContext(), intVT);

10162unsigned LoadedBytes = LoadedVT.getStoreSize();

10163unsigned RegBytes = RegVT.getSizeInBits() / 8;

10164unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;

10165

10166// Make sure the stack slot is also aligned for the register type.

10167SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);

10168auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();

10169SmallVector<SDValue, 8> Stores;

10170SDValue StackPtr = StackBase;

10171unsignedOffset = 0;

10172

10173EVT PtrVT =Ptr.getValueType();

10174EVT StackPtrVT = StackPtr.getValueType();

10175

10176SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);

10177SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);

10178

10179// Do all but one copies using the full register width.

10180for (unsigned i = 1; i < NumRegs; i++) {

10181// Load one integer register's worth from the original location.

10182SDValue Load = DAG.getLoad(

10183 RegVT, dl, Chain,Ptr, LD->getPointerInfo().getWithOffset(Offset),

10184 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),

10185 LD->getAAInfo());

10186// Follow the load with a store to the stack slot. Remember the store.

10187 Stores.push_back(DAG.getStore(

10188 Load.getValue(1), dl, Load, StackPtr,

10189MachinePointerInfo::getFixedStack(MF, FrameIndex,Offset)));

10190// Increment the pointers.

10191Offset += RegBytes;

10192

10193Ptr = DAG.getObjectPtrOffset(dl,Ptr, PtrIncrement);

10194 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);

10195 }

10196

10197// The last copy may be partial. Do an extending load.

10198EVT MemVT =EVT::getIntegerVT(*DAG.getContext(),

10199 8 * (LoadedBytes -Offset));

10200SDValue Load =

10201 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain,Ptr,

10202 LD->getPointerInfo().getWithOffset(Offset), MemVT,

10203 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),

10204 LD->getAAInfo());

10205// Follow the load with a store to the stack slot. Remember the store.

10206// On big-endian machines this requires a truncating store to ensure

10207// that the bits end up in the right place.

10208 Stores.push_back(DAG.getTruncStore(

10209 Load.getValue(1), dl, Load, StackPtr,

10210MachinePointerInfo::getFixedStack(MF, FrameIndex,Offset), MemVT));

10211

10212// The order of the stores doesn't matter - say it with a TokenFactor.

10213SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);

10214

10215// Finally, perform the original load only redirected to the stack slot.

10216 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,

10217MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),

10218 LoadedVT);

10219

10220// Callers expect a MERGE_VALUES node.

10221return std::make_pair(Load, TF);

10222 }

10223

10224assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&

10225"Unaligned load of unsupported type.");

10226

10227// Compute the new VT that is half the size of the old one. This is an

10228// integer MVT.

10229unsigned NumBits = LoadedVT.getSizeInBits();

10230EVT NewLoadedVT;

10231 NewLoadedVT =EVT::getIntegerVT(*DAG.getContext(), NumBits/2);

10232 NumBits >>= 1;

10233

10234Align Alignment = LD->getOriginalAlign();

10235unsigned IncrementSize = NumBits / 8;

10236ISD::LoadExtType HiExtType = LD->getExtensionType();

10237

10238// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.

10239if (HiExtType ==ISD::NON_EXTLOAD)

10240 HiExtType =ISD::ZEXTLOAD;

10241

10242// Load the value in two parts

10243SDValue Lo,Hi;

10244if (DAG.getDataLayout().isLittleEndian()) {

10245Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain,Ptr, LD->getPointerInfo(),

10246 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),

10247 LD->getAAInfo());

10248

10249Ptr = DAG.getObjectPtrOffset(dl,Ptr,TypeSize::getFixed(IncrementSize));

10250Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain,Ptr,

10251 LD->getPointerInfo().getWithOffset(IncrementSize),

10252 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),

10253 LD->getAAInfo());

10254 }else {

10255Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain,Ptr, LD->getPointerInfo(),

10256 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),

10257 LD->getAAInfo());

10258

10259Ptr = DAG.getObjectPtrOffset(dl,Ptr,TypeSize::getFixed(IncrementSize));

10260Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain,Ptr,

10261 LD->getPointerInfo().getWithOffset(IncrementSize),

10262 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),

10263 LD->getAAInfo());

10264 }

10265

10266// aggregate the two parts

10267SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);

10268SDValue Result = DAG.getNode(ISD::SHL, dl, VT,Hi, ShiftAmount);

10269 Result = DAG.getNode(ISD::OR, dl, VT, Result,Lo);

10270

10271SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,Lo.getValue(1),

10272Hi.getValue(1));

10273

10274return std::make_pair(Result, TF);

10275}

10276

10277SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,

10278SelectionDAG &DAG) const{

10279assert(ST->getAddressingMode() ==ISD::UNINDEXED &&

10280"unaligned indexed stores not implemented!");

10281SDValue Chain = ST->getChain();

10282SDValue Ptr = ST->getBasePtr();

10283SDValue Val = ST->getValue();

10284EVT VT = Val.getValueType();

10285Align Alignment = ST->getOriginalAlign();

10286auto &MF = DAG.getMachineFunction();

10287EVT StoreMemVT = ST->getMemoryVT();

10288

10289SDLoc dl(ST);

10290if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {

10291EVT intVT =EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());

10292if (isTypeLegal(intVT)) {

10293if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&

10294 StoreMemVT.isVector()) {

10295// Scalarize the store and let the individual components be handled.

10296SDValue Result =scalarizeVectorStore(ST, DAG);

10297return Result;

10298 }

10299// Expand to a bitconvert of the value to the integer type of the

10300// same size, then a (misaligned) int store.

10301// FIXME: Does not handle truncating floating point stores!

10302SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);

10303 Result = DAG.getStore(Chain, dl, Result,Ptr, ST->getPointerInfo(),

10304 Alignment, ST->getMemOperand()->getFlags());

10305return Result;

10306 }

10307// Do a (aligned) store to a stack slot, then copy from the stack slot

10308// to the final destination using (unaligned) integer loads and stores.

10309MVT RegVT =getRegisterType(

10310 *DAG.getContext(),

10311EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));

10312EVT PtrVT =Ptr.getValueType();

10313unsigned StoredBytes = StoreMemVT.getStoreSize();

10314unsigned RegBytes = RegVT.getSizeInBits() / 8;

10315unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;

10316

10317// Make sure the stack slot is also aligned for the register type.

10318SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);

10319auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

10320

10321// Perform the original store, only redirected to the stack slot.

10322SDValue Store = DAG.getTruncStore(

10323 Chain, dl, Val, StackPtr,

10324MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);

10325

10326EVT StackPtrVT = StackPtr.getValueType();

10327

10328SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);

10329SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);

10330SmallVector<SDValue, 8> Stores;

10331unsignedOffset = 0;

10332

10333// Do all but one copies using the full register width.

10334for (unsigned i = 1; i < NumRegs; i++) {

10335// Load one integer register's worth from the stack slot.

10336SDValue Load = DAG.getLoad(

10337 RegVT, dl, Store, StackPtr,

10338MachinePointerInfo::getFixedStack(MF, FrameIndex,Offset));

10339// Store it to the final location. Remember the store.

10340 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load,Ptr,

10341 ST->getPointerInfo().getWithOffset(Offset),

10342 ST->getOriginalAlign(),

10343 ST->getMemOperand()->getFlags()));

10344// Increment the pointers.

10345Offset += RegBytes;

10346 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);

10347Ptr = DAG.getObjectPtrOffset(dl,Ptr, PtrIncrement);

10348 }

10349

10350// The last store may be partial. Do a truncating store. On big-endian

10351// machines this requires an extending load from the stack slot to ensure

10352// that the bits are in the right place.

10353EVT LoadMemVT =

10354EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes -Offset));

10355

10356// Load from the stack slot.

10357SDValue Load = DAG.getExtLoad(

10358ISD::EXTLOAD, dl, RegVT, Store, StackPtr,

10359MachinePointerInfo::getFixedStack(MF, FrameIndex,Offset), LoadMemVT);

10360

10361 Stores.push_back(

10362 DAG.getTruncStore(Load.getValue(1), dl, Load,Ptr,

10363 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,

10364 ST->getOriginalAlign(),

10365 ST->getMemOperand()->getFlags(), ST->getAAInfo()));

10366// The order of the stores doesn't matter - say it with a TokenFactor.

10367SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);

10368return Result;

10369 }

10370

10371assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&

10372"Unaligned store of unknown type.");

10373// Get the half-size VT

10374EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());

10375unsigned NumBits = NewStoredVT.getFixedSizeInBits();

10376unsigned IncrementSize = NumBits / 8;

10377

10378// Divide the stored value in two parts.

10379SDValue ShiftAmount =

10380 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);

10381SDValue Lo = Val;

10382// If Val is a constant, replace the upper bits with 0. The SRL will constant

10383// fold and not use the upper bits. A smaller constant may be easier to

10384// materialize.

10385if (auto *C = dyn_cast<ConstantSDNode>(Lo);C && !C->isOpaque())

10386Lo = DAG.getNode(

10387ISD::AND, dl, VT,Lo,

10388 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,

10389 VT));

10390SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);

10391

10392// Store the two parts

10393SDValue Store1, Store2;

10394 Store1 = DAG.getTruncStore(Chain, dl,

10395 DAG.getDataLayout().isLittleEndian() ?Lo :Hi,

10396Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,

10397 ST->getMemOperand()->getFlags());

10398

10399Ptr = DAG.getObjectPtrOffset(dl,Ptr,TypeSize::getFixed(IncrementSize));

10400 Store2 = DAG.getTruncStore(

10401 Chain, dl, DAG.getDataLayout().isLittleEndian() ?Hi :Lo,Ptr,

10402 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,

10403 ST->getMemOperand()->getFlags(), ST->getAAInfo());

10404

10405SDValue Result =

10406 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);

10407return Result;

10408}

10409

10410SDValue

10411TargetLowering::IncrementMemoryAddress(SDValue Addr,SDValue Mask,

10412constSDLoc &DL,EVT DataVT,

10413SelectionDAG &DAG,

10414bool IsCompressedMemory) const{

10415SDValue Increment;

10416EVT AddrVT =Addr.getValueType();

10417EVT MaskVT = Mask.getValueType();

10418assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&

10419"Incompatible types of Data and Mask");

10420if (IsCompressedMemory) {

10421if (DataVT.isScalableVector())

10422report_fatal_error(

10423"Cannot currently handle compressed memory with scalable vectors");

10424// Incrementing the pointer according to number of '1's in the mask.

10425EVT MaskIntVT =EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());

10426SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);

10427if (MaskIntVT.getSizeInBits() < 32) {

10428 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i32, MaskInIntReg);

10429 MaskIntVT = MVT::i32;

10430 }

10431

10432// Count '1's with POPCNT.

10433 Increment = DAG.getNode(ISD::CTPOP,DL, MaskIntVT, MaskInIntReg);

10434 Increment = DAG.getZExtOrTrunc(Increment,DL, AddrVT);

10435// Scale is an element size in bytes.

10436SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8,DL,

10437 AddrVT);

10438 Increment = DAG.getNode(ISD::MUL,DL, AddrVT, Increment, Scale);

10439 }elseif (DataVT.isScalableVector()) {

10440 Increment = DAG.getVScale(DL, AddrVT,

10441APInt(AddrVT.getFixedSizeInBits(),

10442 DataVT.getStoreSize().getKnownMinValue()));

10443 }else

10444 Increment = DAG.getConstant(DataVT.getStoreSize(),DL, AddrVT);

10445

10446return DAG.getNode(ISD::ADD,DL, AddrVT,Addr, Increment);

10447}

10448

10449staticSDValue clampDynamicVectorIndex(SelectionDAG &DAG,SDValue Idx,

10450EVT VecVT,constSDLoc &dl,

10451ElementCount SubEC) {

10452assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&

10453"Cannot index a scalable vector within a fixed-width vector");

10454

10455unsigned NElts = VecVT.getVectorMinNumElements();

10456unsigned NumSubElts = SubEC.getKnownMinValue();

10457EVT IdxVT =Idx.getValueType();

10458

10459if (VecVT.isScalableVector() && !SubEC.isScalable()) {

10460// If this is a constant index and we know the value plus the number of the

10461// elements in the subvector minus one is less than the minimum number of

10462// elements then it's safe to return Idx.

10463if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))

10464if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)

10465returnIdx;

10466SDValue VS =

10467 DAG.getVScale(dl, IdxVT,APInt(IdxVT.getFixedSizeInBits(), NElts));

10468unsigned SubOpcode = NumSubElts <= NElts ?ISD::SUB :ISD::USUBSAT;

10469SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,

10470 DAG.getConstant(NumSubElts, dl, IdxVT));

10471return DAG.getNode(ISD::UMIN, dl, IdxVT,Idx, Sub);

10472 }

10473if (isPowerOf2_32(NElts) && NumSubElts == 1) {

10474APInt Imm =APInt::getLowBitsSet(IdxVT.getSizeInBits(),Log2_32(NElts));

10475return DAG.getNode(ISD::AND, dl, IdxVT,Idx,

10476 DAG.getConstant(Imm, dl, IdxVT));

10477 }

10478unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;

10479return DAG.getNode(ISD::UMIN, dl, IdxVT,Idx,

10480 DAG.getConstant(MaxIndex, dl, IdxVT));

10481}

10482

10483SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,

10484SDValue VecPtr,EVT VecVT,

10485SDValue Index) const{

10486return getVectorSubVecPointer(

10487 DAG, VecPtr, VecVT,

10488EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),

10489 Index);

10490}

10491

10492SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,

10493SDValue VecPtr,EVT VecVT,

10494EVT SubVecVT,

10495SDValue Index) const{

10496SDLoc dl(Index);

10497// Make sure the index type is big enough to compute in.

10498 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());

10499

10500EVT EltVT = VecVT.getVectorElementType();

10501

10502// Calculate the element offset and add it to the pointer.

10503unsigned EltSize = EltVT.getFixedSizeInBits() / 8;// FIXME: should be ABI size.

10504assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&

10505"Converting bits to bytes lost precision");

10506assert(SubVecVT.getVectorElementType() == EltVT &&

10507"Sub-vector must be a vector with matching element type");

10508 Index =clampDynamicVectorIndex(DAG, Index, VecVT, dl,

10509 SubVecVT.getVectorElementCount());

10510

10511EVT IdxVT = Index.getValueType();

10512if (SubVecVT.isScalableVector())

10513 Index =

10514 DAG.getNode(ISD::MUL, dl, IdxVT, Index,

10515 DAG.getVScale(dl, IdxVT,APInt(IdxVT.getSizeInBits(), 1)));

10516

10517 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,

10518 DAG.getConstant(EltSize, dl, IdxVT));

10519return DAG.getMemBasePlusOffset(VecPtr, Index, dl);

10520}

10521

10522//===----------------------------------------------------------------------===//

10523// Implementation of Emulated TLS Model

10524//===----------------------------------------------------------------------===//

10525

10526SDValue TargetLowering::LowerToTLSEmulatedModel(constGlobalAddressSDNode *GA,

10527SelectionDAG &DAG) const{

10528// Access to address of TLS varialbe xyz is lowered to a function call:

10529// __emutls_get_address( address of global variable named "__emutls_v.xyz" )

10530EVT PtrVT =getPointerTy(DAG.getDataLayout());

10531PointerType *VoidPtrType =PointerType::get(*DAG.getContext(), 0);

10532SDLoc dl(GA);

10533

10534ArgListTy Args;

10535ArgListEntry Entry;

10536constGlobalValue *GV =

10537 cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());

10538SmallString<32> NameString("__emutls_v.");

10539 NameString += GV->getName();

10540StringRef EmuTlsVarName(NameString);

10541constGlobalVariable *EmuTlsVar =

10542 GV->getParent()->getNamedGlobal(EmuTlsVarName);

10543assert(EmuTlsVar &&"Cannot find EmuTlsVar ");

10544 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);

10545 Entry.Ty = VoidPtrType;

10546 Args.push_back(Entry);

10547

10548SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);

10549

10550TargetLowering::CallLoweringInfo CLI(DAG);

10551 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());

10552 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));

10553 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

10554

10555// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.

10556// At last for X86 targets, maybe good for other targets too?

10557MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

10558 MFI.setAdjustsStack(true);// Is this only for X86 target?

10559 MFI.setHasCalls(true);

10560

10561assert((GA->getOffset() == 0) &&

10562"Emulated TLS must have zero offset in GlobalAddressSDNode");

10563return CallResult.first;

10564}

10565

10566SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,

10567SelectionDAG &DAG) const{

10568assert((Op->getOpcode() ==ISD::SETCC) &&"Input has to be a SETCC node.");

10569if (!isCtlzFast())

10570returnSDValue();

10571ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

10572SDLoc dl(Op);

10573if (isNullConstant(Op.getOperand(1)) &&CC ==ISD::SETEQ) {

10574EVT VT =Op.getOperand(0).getValueType();

10575SDValue Zext =Op.getOperand(0);

10576if (VT.bitsLT(MVT::i32)) {

10577 VT = MVT::i32;

10578 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT,Op.getOperand(0));

10579 }

10580unsigned Log2b =Log2_32(VT.getSizeInBits());

10581SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);

10582SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,

10583 DAG.getConstant(Log2b, dl, MVT::i32));

10584return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);

10585 }

10586returnSDValue();

10587}

10588

10589SDValue TargetLowering::expandIntMINMAX(SDNode *Node,SelectionDAG &DAG) const{

10590SDValue Op0 = Node->getOperand(0);

10591SDValue Op1 = Node->getOperand(1);

10592EVT VT = Op0.getValueType();

10593EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

10594unsigned Opcode = Node->getOpcode();

10595SDLoc DL(Node);

10596

10597// umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits

10598if (Opcode ==ISD::UMAX &&llvm::isOneOrOneSplat(Op1,true) && BoolVT == VT &&

10599getBooleanContents(VT) ==ZeroOrNegativeOneBooleanContent) {

10600 Op0 = DAG.getFreeze(Op0);

10601SDValue Zero = DAG.getConstant(0,DL, VT);

10602return DAG.getNode(ISD::SUB,DL, VT, Op0,

10603 DAG.getSetCC(DL, VT, Op0, Zero,ISD::SETEQ));

10604 }

10605

10606// umin(x,y) -> sub(x,usubsat(x,y))

10607// TODO: Missing freeze(Op0)?

10608if (Opcode ==ISD::UMIN &&isOperationLegal(ISD::SUB, VT) &&

10609isOperationLegal(ISD::USUBSAT, VT)) {

10610return DAG.getNode(ISD::SUB,DL, VT, Op0,

10611 DAG.getNode(ISD::USUBSAT,DL, VT, Op0, Op1));

10612 }

10613

10614// umax(x,y) -> add(x,usubsat(y,x))

10615// TODO: Missing freeze(Op0)?

10616if (Opcode ==ISD::UMAX &&isOperationLegal(ISD::ADD, VT) &&

10617isOperationLegal(ISD::USUBSAT, VT)) {

10618return DAG.getNode(ISD::ADD,DL, VT, Op0,

10619 DAG.getNode(ISD::USUBSAT,DL, VT, Op1, Op0));

10620 }

10621

10622// FIXME: Should really try to split the vector in case it's legal on a

10623// subvector.

10624if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))

10625return DAG.UnrollVectorOp(Node);

10626

10627// Attempt to find an existing SETCC node that we can reuse.

10628// TODO: Do we need a generic doesSETCCNodeExist?

10629// TODO: Missing freeze(Op0)/freeze(Op1)?

10630auto buildMinMax = [&](ISD::CondCode PrefCC,ISD::CondCode AltCC,

10631ISD::CondCode PrefCommuteCC,

10632ISD::CondCode AltCommuteCC) {

10633SDVTList BoolVTList = DAG.getVTList(BoolVT);

10634for (ISD::CondCode CC : {PrefCC, AltCC}) {

10635if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,

10636 {Op0, Op1, DAG.getCondCode(CC)})) {

10637SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1,CC);

10638return DAG.getSelect(DL, VT,Cond, Op0, Op1);

10639 }

10640 }

10641for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {

10642if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,

10643 {Op0, Op1, DAG.getCondCode(CC)})) {

10644SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1,CC);

10645return DAG.getSelect(DL, VT,Cond, Op1, Op0);

10646 }

10647 }

10648SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);

10649return DAG.getSelect(DL, VT,Cond, Op0, Op1);

10650 };

10651

10652// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B

10653// -> Y = (A < B) ? B : A

10654// -> Y = (A >= B) ? A : B

10655// -> Y = (A <= B) ? B : A

10656switch (Opcode) {

10657caseISD::SMAX:

10658return buildMinMax(ISD::SETGT,ISD::SETGE,ISD::SETLT,ISD::SETLE);

10659caseISD::SMIN:

10660return buildMinMax(ISD::SETLT,ISD::SETLE,ISD::SETGT,ISD::SETGE);

10661caseISD::UMAX:

10662return buildMinMax(ISD::SETUGT,ISD::SETUGE,ISD::SETULT,ISD::SETULE);

10663caseISD::UMIN:

10664return buildMinMax(ISD::SETULT,ISD::SETULE,ISD::SETUGT,ISD::SETUGE);

10665 }

10666

10667llvm_unreachable("How did we get here?");

10668}

10669

10670SDValue TargetLowering::expandAddSubSat(SDNode *Node,SelectionDAG &DAG) const{

10671unsigned Opcode = Node->getOpcode();

10672SDValue LHS = Node->getOperand(0);

10673SDValue RHS = Node->getOperand(1);

10674EVT VT =LHS.getValueType();

10675SDLoc dl(Node);

10676

10677assert(VT ==RHS.getValueType() &&"Expected operands to be the same type");

10678assert(VT.isInteger() &&"Expected operands to be integers");

10679

10680// usub.sat(a, b) -> umax(a, b) - b

10681if (Opcode ==ISD::USUBSAT &&isOperationLegal(ISD::UMAX, VT)) {

10682SDValue Max = DAG.getNode(ISD::UMAX, dl, VT,LHS,RHS);

10683return DAG.getNode(ISD::SUB, dl, VT, Max,RHS);

10684 }

10685

10686// uadd.sat(a, b) -> umin(a, ~b) + b

10687if (Opcode ==ISD::UADDSAT &&isOperationLegal(ISD::UMIN, VT)) {

10688SDValue InvRHS = DAG.getNOT(dl,RHS, VT);

10689SDValue Min = DAG.getNode(ISD::UMIN, dl, VT,LHS, InvRHS);

10690return DAG.getNode(ISD::ADD, dl, VT, Min,RHS);

10691 }

10692

10693unsigned OverflowOp;

10694switch (Opcode) {

10695caseISD::SADDSAT:

10696 OverflowOp =ISD::SADDO;

10697break;

10698caseISD::UADDSAT:

10699 OverflowOp =ISD::UADDO;

10700break;

10701caseISD::SSUBSAT:

10702 OverflowOp =ISD::SSUBO;

10703break;

10704caseISD::USUBSAT:

10705 OverflowOp =ISD::USUBO;

10706break;

10707default:

10708llvm_unreachable("Expected method to receive signed or unsigned saturation "

10709"addition or subtraction node.");

10710 }

10711

10712// FIXME: Should really try to split the vector in case it's legal on a

10713// subvector.

10714if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))

10715return DAG.UnrollVectorOp(Node);

10716

10717unsignedBitWidth =LHS.getScalarValueSizeInBits();

10718EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

10719SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),LHS,RHS);

10720SDValue SumDiff = Result.getValue(0);

10721SDValue Overflow = Result.getValue(1);

10722SDValue Zero = DAG.getConstant(0, dl, VT);

10723SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);

10724

10725if (Opcode ==ISD::UADDSAT) {

10726if (getBooleanContents(VT) ==ZeroOrNegativeOneBooleanContent) {

10727// (LHS + RHS) | OverflowMask

10728SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);

10729return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);

10730 }

10731// Overflow ? 0xffff.... : (LHS + RHS)

10732return DAG.getSelect(dl, VT, Overflow,AllOnes, SumDiff);

10733 }

10734

10735if (Opcode ==ISD::USUBSAT) {

10736if (getBooleanContents(VT) ==ZeroOrNegativeOneBooleanContent) {

10737// (LHS - RHS) & ~OverflowMask

10738SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);

10739SDValue Not = DAG.getNOT(dl, OverflowMask, VT);

10740return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);

10741 }

10742// Overflow ? 0 : (LHS - RHS)

10743return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);

10744 }

10745

10746if (Opcode ==ISD::SADDSAT || Opcode ==ISD::SSUBSAT) {

10747APInt MinVal =APInt::getSignedMinValue(BitWidth);

10748APInt MaxVal =APInt::getSignedMaxValue(BitWidth);

10749

10750KnownBits KnownLHS = DAG.computeKnownBits(LHS);

10751KnownBits KnownRHS = DAG.computeKnownBits(RHS);

10752

10753// If either of the operand signs are known, then they are guaranteed to

10754// only saturate in one direction. If non-negative they will saturate

10755// towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.

10756//

10757// In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the

10758// sign of 'y' has to be flipped.

10759

10760bool LHSIsNonNegative = KnownLHS.isNonNegative();

10761bool RHSIsNonNegative = Opcode ==ISD::SADDSAT ? KnownRHS.isNonNegative()

10762 : KnownRHS.isNegative();

10763if (LHSIsNonNegative || RHSIsNonNegative) {

10764SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

10765return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);

10766 }

10767

10768bool LHSIsNegative = KnownLHS.isNegative();

10769bool RHSIsNegative = Opcode ==ISD::SADDSAT ? KnownRHS.isNegative()

10770 : KnownRHS.isNonNegative();

10771if (LHSIsNegative || RHSIsNegative) {

10772SDValue SatMin = DAG.getConstant(MinVal, dl, VT);

10773return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);

10774 }

10775 }

10776

10777// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff

10778APInt MinVal =APInt::getSignedMinValue(BitWidth);

10779SDValue SatMin = DAG.getConstant(MinVal, dl, VT);

10780SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,

10781 DAG.getConstant(BitWidth - 1, dl, VT));

10782 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);

10783return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);

10784}

10785

10786SDValue TargetLowering::expandCMP(SDNode *Node,SelectionDAG &DAG) const{

10787unsigned Opcode = Node->getOpcode();

10788SDValue LHS = Node->getOperand(0);

10789SDValue RHS = Node->getOperand(1);

10790EVT VT =LHS.getValueType();

10791EVT ResVT = Node->getValueType(0);

10792EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

10793SDLoc dl(Node);

10794

10795auto LTPredicate = (Opcode ==ISD::UCMP ?ISD::SETULT :ISD::SETLT);

10796auto GTPredicate = (Opcode ==ISD::UCMP ?ISD::SETUGT :ISD::SETGT);

10797SDValue IsLT = DAG.getSetCC(dl, BoolVT,LHS,RHS, LTPredicate);

10798SDValue IsGT = DAG.getSetCC(dl, BoolVT,LHS,RHS, GTPredicate);

10799

10800// We can't perform arithmetic on i1 values. Extending them would

10801// probably result in worse codegen, so let's just use two selects instead.

10802// Some targets are also just better off using selects rather than subtraction

10803// because one of the conditions can be merged with one of the selects.

10804// And finally, if we don't know the contents of high bits of a boolean value

10805// we can't perform any arithmetic either.

10806if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||

10807getBooleanContents(BoolVT) ==UndefinedBooleanContent) {

10808SDValue SelectZeroOrOne =

10809 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),

10810 DAG.getConstant(0, dl, ResVT));

10811return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),

10812 SelectZeroOrOne);

10813 }

10814

10815if (getBooleanContents(BoolVT) ==ZeroOrNegativeOneBooleanContent)

10816std::swap(IsGT, IsLT);

10817return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,

10818 ResVT);

10819}

10820

10821SDValue TargetLowering::expandShlSat(SDNode *Node,SelectionDAG &DAG) const{

10822unsigned Opcode = Node->getOpcode();

10823bool IsSigned = Opcode ==ISD::SSHLSAT;

10824SDValue LHS = Node->getOperand(0);

10825SDValue RHS = Node->getOperand(1);

10826EVT VT =LHS.getValueType();

10827SDLoc dl(Node);

10828

10829assert((Node->getOpcode() ==ISD::SSHLSAT ||

10830 Node->getOpcode() ==ISD::USHLSAT) &&

10831"Expected a SHLSAT opcode");

10832assert(VT ==RHS.getValueType() &&"Expected operands to be the same type");

10833assert(VT.isInteger() &&"Expected operands to be integers");

10834

10835if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))

10836return DAG.UnrollVectorOp(Node);

10837

10838// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.

10839

10840unsigned BW = VT.getScalarSizeInBits();

10841EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

10842SDValue Result = DAG.getNode(ISD::SHL, dl, VT,LHS,RHS);

10843SDValue Orig =

10844 DAG.getNode(IsSigned ?ISD::SRA :ISD::SRL, dl, VT, Result,RHS);

10845

10846SDValue SatVal;

10847if (IsSigned) {

10848SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);

10849SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);

10850SDValue Cond =

10851 DAG.getSetCC(dl, BoolVT,LHS, DAG.getConstant(0, dl, VT),ISD::SETLT);

10852 SatVal = DAG.getSelect(dl, VT,Cond, SatMin, SatMax);

10853 }else {

10854 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);

10855 }

10856SDValue Cond = DAG.getSetCC(dl, BoolVT,LHS, Orig,ISD::SETNE);

10857return DAG.getSelect(dl, VT,Cond, SatVal, Result);

10858}

10859

10860voidTargetLowering::forceExpandMultiply(SelectionDAG &DAG,constSDLoc &dl,

10861boolSigned,SDValue &Lo,SDValue &Hi,

10862SDValue LHS,SDValue RHS,

10863SDValue HiLHS,SDValue HiRHS) const{

10864EVT VT =LHS.getValueType();

10865assert(RHS.getValueType() == VT &&"Mismatching operand types");

10866

10867assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));

10868assert((!Signed || !HiLHS) &&

10869"Signed flag should only be set when HiLHS and RiRHS are null");

10870

10871// We'll expand the multiplication by brute force because we have no other

10872// options. This is a trivially-generalized version of the code from

10873// Hacker's Delight (itself derived from Knuth's Algorithm M from section

10874// 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate

10875// sign bits while calculating the Hi half.

10876unsigned Bits = VT.getSizeInBits();

10877unsigned HalfBits = Bits / 2;

10878SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);

10879SDValue LL = DAG.getNode(ISD::AND, dl, VT,LHS, Mask);

10880SDValue RL = DAG.getNode(ISD::AND, dl, VT,RHS, Mask);

10881

10882SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);

10883SDValue TL = DAG.getNode(ISD::AND, dl, VT,T, Mask);

10884

10885SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);

10886// This is always an unsigned shift.

10887SDValue TH = DAG.getNode(ISD::SRL, dl, VT,T, Shift);

10888

10889unsigned ShiftOpc =Signed ?ISD::SRA :ISD::SRL;

10890SDValue LH = DAG.getNode(ShiftOpc, dl, VT,LHS, Shift);

10891SDValue RH = DAG.getNode(ShiftOpc, dl, VT,RHS, Shift);

10892

10893SDValue U =

10894 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);

10895SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);

10896SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);

10897

10898SDValue V =

10899 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);

10900SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);

10901

10902Lo = DAG.getNode(ISD::ADD, dl, VT, TL,

10903 DAG.getNode(ISD::SHL, dl, VT, V, Shift));

10904

10905Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),

10906 DAG.getNode(ISD::ADD, dl, VT, UH, VH));

10907

10908// If HiLHS and HiRHS are set, multiply them by the opposite low part and add

10909// the products to Hi.

10910if (HiLHS) {

10911Hi = DAG.getNode(ISD::ADD, dl, VT,Hi,

10912 DAG.getNode(ISD::ADD, dl, VT,

10913 DAG.getNode(ISD::MUL, dl, VT, HiRHS,LHS),

10914 DAG.getNode(ISD::MUL, dl, VT,RHS, HiLHS)));

10915 }

10916}

10917

10918voidTargetLowering::forceExpandWideMUL(SelectionDAG &DAG,constSDLoc &dl,

10919boolSigned,constSDValue LHS,

10920constSDValue RHS,SDValue &Lo,

10921SDValue &Hi) const{

10922EVT VT =LHS.getValueType();

10923assert(RHS.getValueType() == VT &&"Mismatching operand types");

10924EVT WideVT =EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);

10925// We can fall back to a libcall with an illegal type for the MUL if we

10926// have a libcall big enough.

10927RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;

10928if (WideVT == MVT::i16)

10929 LC = RTLIB::MUL_I16;

10930elseif (WideVT == MVT::i32)

10931 LC = RTLIB::MUL_I32;

10932elseif (WideVT == MVT::i64)

10933 LC = RTLIB::MUL_I64;

10934elseif (WideVT == MVT::i128)

10935 LC = RTLIB::MUL_I128;

10936

10937if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {

10938 forceExpandMultiply(DAG, dl,Signed,Lo,Hi,LHS,RHS);

10939return;

10940 }

10941

10942SDValue HiLHS, HiRHS;

10943if (Signed) {

10944// The high part is obtained by SRA'ing all but one of the bits of low

10945// part.

10946unsigned LoSize = VT.getFixedSizeInBits();

10947SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);

10948 HiLHS = DAG.getNode(ISD::SRA, dl, VT,LHS, Shift);

10949 HiRHS = DAG.getNode(ISD::SRA, dl, VT,RHS, Shift);

10950 }else {

10951 HiLHS = DAG.getConstant(0, dl, VT);

10952 HiRHS = DAG.getConstant(0, dl, VT);

10953 }

10954

10955// Attempt a libcall.

10956SDValue Ret;

10957TargetLowering::MakeLibCallOptions CallOptions;

10958 CallOptions.setIsSigned(Signed);

10959 CallOptions.setIsPostTypeLegalization(true);

10960if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {

10961// Halves of WideVT are packed into registers in different order

10962// depending on platform endianness. This is usually handled by

10963// the C calling convention, but we can't defer to it in

10964// the legalizer.

10965SDValue Args[] = {LHS, HiLHS,RHS, HiRHS};

10966 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;

10967 }else {

10968SDValue Args[] = {HiLHS,LHS, HiRHS,RHS};

10969 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;

10970 }

10971assert(Ret.getOpcode() ==ISD::MERGE_VALUES &&

10972"Ret value is a collection of constituent nodes holding result.");

10973if (DAG.getDataLayout().isLittleEndian()) {

10974// Same as above.

10975Lo = Ret.getOperand(0);

10976Hi = Ret.getOperand(1);

10977 }else {

10978Lo = Ret.getOperand(1);

10979Hi = Ret.getOperand(0);

10980 }

10981}

10982

10983SDValue

10984TargetLowering::expandFixedPointMul(SDNode *Node,SelectionDAG &DAG) const{

10985assert((Node->getOpcode() ==ISD::SMULFIX ||

10986 Node->getOpcode() ==ISD::UMULFIX ||

10987 Node->getOpcode() ==ISD::SMULFIXSAT ||

10988 Node->getOpcode() ==ISD::UMULFIXSAT) &&

10989"Expected a fixed point multiplication opcode");

10990

10991SDLoc dl(Node);

10992SDValue LHS = Node->getOperand(0);

10993SDValue RHS = Node->getOperand(1);

10994EVT VT =LHS.getValueType();

10995unsigned Scale = Node->getConstantOperandVal(2);

10996bool Saturating = (Node->getOpcode() ==ISD::SMULFIXSAT ||

10997 Node->getOpcode() ==ISD::UMULFIXSAT);

10998boolSigned = (Node->getOpcode() ==ISD::SMULFIX ||

10999 Node->getOpcode() ==ISD::SMULFIXSAT);

11000EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

11001unsigned VTSize = VT.getScalarSizeInBits();

11002

11003if (!Scale) {

11004// [us]mul.fix(a, b, 0) -> mul(a, b)

11005if (!Saturating) {

11006if (isOperationLegalOrCustom(ISD::MUL, VT))

11007return DAG.getNode(ISD::MUL, dl, VT,LHS,RHS);

11008 }elseif (Signed &&isOperationLegalOrCustom(ISD::SMULO, VT)) {

11009SDValue Result =

11010 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT),LHS,RHS);

11011SDValue Product = Result.getValue(0);

11012SDValue Overflow = Result.getValue(1);

11013SDValue Zero = DAG.getConstant(0, dl, VT);

11014

11015APInt MinVal =APInt::getSignedMinValue(VTSize);

11016APInt MaxVal =APInt::getSignedMaxValue(VTSize);

11017SDValue SatMin = DAG.getConstant(MinVal, dl, VT);

11018SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

11019// Xor the inputs, if resulting sign bit is 0 the product will be

11020// positive, else negative.

11021SDValue Xor = DAG.getNode(ISD::XOR, dl, VT,LHS,RHS);

11022SDValue ProdNeg = DAG.getSetCC(dl, BoolVT,Xor, Zero,ISD::SETLT);

11023 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);

11024return DAG.getSelect(dl, VT, Overflow, Result, Product);

11025 }elseif (!Signed &&isOperationLegalOrCustom(ISD::UMULO, VT)) {

11026SDValue Result =

11027 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT),LHS,RHS);

11028SDValue Product = Result.getValue(0);

11029SDValue Overflow = Result.getValue(1);

11030

11031APInt MaxVal =APInt::getMaxValue(VTSize);

11032SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

11033return DAG.getSelect(dl, VT, Overflow, SatMax, Product);

11034 }

11035 }

11036

11037assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&

11038"Expected scale to be less than the number of bits if signed or at "

11039"most the number of bits if unsigned.");

11040assert(LHS.getValueType() ==RHS.getValueType() &&

11041"Expected both operands to be the same type");

11042

11043// Get the upper and lower bits of the result.

11044SDValue Lo,Hi;

11045unsigned LoHiOp =Signed ?ISD::SMUL_LOHI :ISD::UMUL_LOHI;

11046unsigned HiOp =Signed ?ISD::MULHS :ISD::MULHU;

11047EVT WideVT =EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);

11048if (VT.isVector())

11049 WideVT =

11050EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());

11051if (isOperationLegalOrCustom(LoHiOp, VT)) {

11052SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT),LHS,RHS);

11053Lo = Result.getValue(0);

11054Hi = Result.getValue(1);

11055 }elseif (isOperationLegalOrCustom(HiOp, VT)) {

11056Lo = DAG.getNode(ISD::MUL, dl, VT,LHS,RHS);

11057Hi = DAG.getNode(HiOp, dl, VT,LHS,RHS);

11058 }elseif (isOperationLegalOrCustom(ISD::MUL, WideVT)) {

11059// Try for a multiplication using a wider type.

11060unsigned Ext =Signed ?ISD::SIGN_EXTEND :ISD::ZERO_EXTEND;

11061SDValue LHSExt = DAG.getNode(Ext, dl, WideVT,LHS);

11062SDValue RHSExt = DAG.getNode(Ext, dl, WideVT,RHS);

11063SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);

11064Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);

11065SDValue Shifted =

11066 DAG.getNode(ISD::SRA, dl, WideVT, Res,

11067 DAG.getShiftAmountConstant(VTSize, WideVT, dl));

11068Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);

11069 }elseif (VT.isVector()) {

11070returnSDValue();

11071 }else {

11072 forceExpandWideMUL(DAG, dl,Signed,LHS,RHS,Lo,Hi);

11073 }

11074

11075if (Scale == VTSize)

11076// Result is just the top half since we'd be shifting by the width of the

11077// operand. Overflow impossible so this works for both UMULFIX and

11078// UMULFIXSAT.

11079returnHi;

11080

11081// The result will need to be shifted right by the scale since both operands

11082// are scaled. The result is given to us in 2 halves, so we only want part of

11083// both in the result.

11084SDValue Result = DAG.getNode(ISD::FSHR, dl, VT,Hi,Lo,

11085 DAG.getShiftAmountConstant(Scale, VT, dl));

11086if (!Saturating)

11087return Result;

11088

11089if (!Signed) {

11090// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the

11091// widened multiplication) aren't all zeroes.

11092

11093// Saturate to max if ((Hi >> Scale) != 0),

11094// which is the same as if (Hi > ((1 << Scale) - 1))

11095APInt MaxVal =APInt::getMaxValue(VTSize);

11096SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),

11097 dl, VT);

11098 Result = DAG.getSelectCC(dl,Hi, LowMask,

11099 DAG.getConstant(MaxVal, dl, VT), Result,

11100ISD::SETUGT);

11101

11102return Result;

11103 }

11104

11105// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the

11106// widened multiplication) aren't all ones or all zeroes.

11107

11108SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);

11109SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);

11110

11111if (Scale == 0) {

11112SDValue Sign = DAG.getNode(ISD::SRA, dl, VT,Lo,

11113 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));

11114SDValue Overflow = DAG.getSetCC(dl, BoolVT,Hi, Sign,ISD::SETNE);

11115// Saturated to SatMin if wide product is negative, and SatMax if wide

11116// product is positive ...

11117SDValue Zero = DAG.getConstant(0, dl, VT);

11118SDValue ResultIfOverflow = DAG.getSelectCC(dl,Hi, Zero, SatMin, SatMax,

11119ISD::SETLT);

11120// ... but only if we overflowed.

11121return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);

11122 }

11123

11124// We handled Scale==0 above so all the bits to examine is in Hi.

11125

11126// Saturate to max if ((Hi >> (Scale - 1)) > 0),

11127// which is the same as if (Hi > (1 << (Scale - 1)) - 1)

11128SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),

11129 dl, VT);

11130 Result = DAG.getSelectCC(dl,Hi, LowMask, SatMax, Result,ISD::SETGT);

11131// Saturate to min if (Hi >> (Scale - 1)) < -1),

11132// which is the same as if (HI < (-1 << (Scale - 1))

11133SDValue HighMask =

11134 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),

11135 dl, VT);

11136 Result = DAG.getSelectCC(dl,Hi, HighMask, SatMin, Result,ISD::SETLT);

11137return Result;

11138}

11139

11140SDValue

11141TargetLowering::expandFixedPointDiv(unsigned Opcode,constSDLoc &dl,

11142SDValue LHS,SDValue RHS,

11143unsigned Scale,SelectionDAG &DAG) const{

11144assert((Opcode ==ISD::SDIVFIX || Opcode ==ISD::SDIVFIXSAT ||

11145 Opcode ==ISD::UDIVFIX || Opcode ==ISD::UDIVFIXSAT) &&

11146"Expected a fixed point division opcode");

11147

11148EVT VT =LHS.getValueType();

11149boolSigned = Opcode ==ISD::SDIVFIX || Opcode ==ISD::SDIVFIXSAT;

11150bool Saturating = Opcode ==ISD::SDIVFIXSAT || Opcode ==ISD::UDIVFIXSAT;

11151EVT BoolVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

11152

11153// If there is enough room in the type to upscale the LHS or downscale the

11154// RHS before the division, we can perform it in this type without having to

11155// resize. For signed operations, the LHS headroom is the number of

11156// redundant sign bits, and for unsigned ones it is the number of zeroes.

11157// The headroom for the RHS is the number of trailing zeroes.

11158unsigned LHSLead =Signed ? DAG.ComputeNumSignBits(LHS) - 1

11159 : DAG.computeKnownBits(LHS).countMinLeadingZeros();

11160unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();

11161

11162// For signed saturating operations, we need to be able to detect true integer

11163// division overflow; that is, when you have MIN / -EPS. However, this

11164// is undefined behavior and if we emit divisions that could take such

11165// values it may cause undesired behavior (arithmetic exceptions on x86, for

11166// example).

11167// Avoid this by requiring an extra bit so that we never get this case.

11168// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale

11169// signed saturating division, we need to emit a whopping 32-bit division.

11170if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating &&Signed))

11171returnSDValue();

11172

11173unsigned LHSShift = std::min(LHSLead, Scale);

11174unsigned RHSShift = Scale - LHSShift;

11175

11176// At this point, we know that if we shift the LHS up by LHSShift and the

11177// RHS down by RHSShift, we can emit a regular division with a final scaling

11178// factor of Scale.

11179

11180if (LHSShift)

11181LHS = DAG.getNode(ISD::SHL, dl, VT,LHS,

11182 DAG.getShiftAmountConstant(LHSShift, VT, dl));

11183if (RHSShift)

11184RHS = DAG.getNode(Signed ?ISD::SRA :ISD::SRL, dl, VT,RHS,

11185 DAG.getShiftAmountConstant(RHSShift, VT, dl));

11186

11187SDValue Quot;

11188if (Signed) {

11189// For signed operations, if the resulting quotient is negative and the

11190// remainder is nonzero, subtract 1 from the quotient to round towards

11191// negative infinity.

11192SDValue Rem;

11193// FIXME: Ideally we would always produce an SDIVREM here, but if the

11194// type isn't legal, SDIVREM cannot be expanded. There is no reason why

11195// we couldn't just form a libcall, but the type legalizer doesn't do it.

11196if (isTypeLegal(VT) &&

11197isOperationLegalOrCustom(ISD::SDIVREM, VT)) {

11198 Quot = DAG.getNode(ISD::SDIVREM, dl,

11199 DAG.getVTList(VT, VT),

11200LHS,RHS);

11201 Rem = Quot.getValue(1);

11202 Quot = Quot.getValue(0);

11203 }else {

11204 Quot = DAG.getNode(ISD::SDIV, dl, VT,

11205LHS,RHS);

11206 Rem = DAG.getNode(ISD::SREM, dl, VT,

11207LHS,RHS);

11208 }

11209SDValue Zero = DAG.getConstant(0, dl, VT);

11210SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero,ISD::SETNE);

11211SDValue LHSNeg = DAG.getSetCC(dl, BoolVT,LHS, Zero,ISD::SETLT);

11212SDValue RHSNeg = DAG.getSetCC(dl, BoolVT,RHS, Zero,ISD::SETLT);

11213SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);

11214SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,

11215 DAG.getConstant(1, dl, VT));

11216 Quot = DAG.getSelect(dl, VT,

11217 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),

11218 Sub1, Quot);

11219 }else

11220 Quot = DAG.getNode(ISD::UDIV, dl, VT,

11221LHS,RHS);

11222

11223return Quot;

11224}

11225

11226voidTargetLowering::expandUADDSUBO(

11227SDNode *Node,SDValue &Result,SDValue &Overflow,SelectionDAG &DAG) const{

11228SDLoc dl(Node);

11229SDValue LHS = Node->getOperand(0);

11230SDValue RHS = Node->getOperand(1);

11231bool IsAdd = Node->getOpcode() ==ISD::UADDO;

11232

11233// If UADDO_CARRY/SUBO_CARRY is legal, use that instead.

11234unsigned OpcCarry = IsAdd ?ISD::UADDO_CARRY :ISD::USUBO_CARRY;

11235if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {

11236SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));

11237SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),

11238 { LHS, RHS, CarryIn });

11239 Result =SDValue(NodeCarry.getNode(), 0);

11240 Overflow =SDValue(NodeCarry.getNode(), 1);

11241return;

11242 }

11243

11244 Result = DAG.getNode(IsAdd ?ISD::ADD :ISD::SUB, dl,

11245LHS.getValueType(),LHS,RHS);

11246

11247EVT ResultType = Node->getValueType(1);

11248EVT SetCCType =getSetCCResultType(

11249 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));

11250SDValue SetCC;

11251if (IsAdd &&isOneConstant(RHS)) {

11252// Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces

11253// the live range of X. We assume comparing with 0 is cheap.

11254// The general case (X + C) < C is not necessarily beneficial. Although we

11255// reduce the live range of X, we may introduce the materialization of

11256// constant C.

11257 SetCC =

11258 DAG.getSetCC(dl, SetCCType, Result,

11259 DAG.getConstant(0, dl, Node->getValueType(0)),ISD::SETEQ);

11260 }elseif (IsAdd &&isAllOnesConstant(RHS)) {

11261// Special case: uaddo X, -1 overflows if X != 0.

11262 SetCC =

11263 DAG.getSetCC(dl, SetCCType,LHS,

11264 DAG.getConstant(0, dl, Node->getValueType(0)),ISD::SETNE);

11265 }else {

11266ISD::CondCode CC = IsAdd ?ISD::SETULT :ISD::SETUGT;

11267 SetCC = DAG.getSetCC(dl, SetCCType, Result,LHS,CC);

11268 }

11269 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);

11270}

11271

11272voidTargetLowering::expandSADDSUBO(

11273SDNode *Node,SDValue &Result,SDValue &Overflow,SelectionDAG &DAG) const{

11274SDLoc dl(Node);

11275SDValue LHS = Node->getOperand(0);

11276SDValue RHS = Node->getOperand(1);

11277bool IsAdd = Node->getOpcode() ==ISD::SADDO;

11278

11279 Result = DAG.getNode(IsAdd ?ISD::ADD :ISD::SUB, dl,

11280LHS.getValueType(),LHS,RHS);

11281

11282EVT ResultType = Node->getValueType(1);

11283EVT OType =getSetCCResultType(

11284 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));

11285

11286// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.

11287unsigned OpcSat = IsAdd ?ISD::SADDSAT :ISD::SSUBSAT;

11288if (isOperationLegal(OpcSat,LHS.getValueType())) {

11289SDValue Sat = DAG.getNode(OpcSat, dl,LHS.getValueType(),LHS,RHS);

11290SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat,ISD::SETNE);

11291 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);

11292return;

11293 }

11294

11295SDValue Zero = DAG.getConstant(0, dl,LHS.getValueType());

11296

11297// For an addition, the result should be less than one of the operands (LHS)

11298// if and only if the other operand (RHS) is negative, otherwise there will

11299// be overflow.

11300// For a subtraction, the result should be less than one of the operands

11301// (LHS) if and only if the other operand (RHS) is (non-zero) positive,

11302// otherwise there will be overflow.

11303SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result,LHS,ISD::SETLT);

11304SDValue ConditionRHS =

11305 DAG.getSetCC(dl, OType,RHS, Zero, IsAdd ?ISD::SETLT :ISD::SETGT);

11306

11307 Overflow = DAG.getBoolExtOrTrunc(

11308 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,

11309 ResultType, ResultType);

11310}

11311

11312boolTargetLowering::expandMULO(SDNode *Node,SDValue &Result,

11313SDValue &Overflow,SelectionDAG &DAG) const{

11314SDLoc dl(Node);

11315EVT VT = Node->getValueType(0);

11316EVT SetCCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

11317SDValue LHS = Node->getOperand(0);

11318SDValue RHS = Node->getOperand(1);

11319boolisSigned = Node->getOpcode() ==ISD::SMULO;

11320

11321// For power-of-two multiplications we can use a simpler shift expansion.

11322if (ConstantSDNode *RHSC =isConstOrConstSplat(RHS)) {

11323constAPInt &C = RHSC->getAPIntValue();

11324// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }

11325if (C.isPowerOf2()) {

11326// smulo(x, signed_min) is same as umulo(x, signed_min).

11327bool UseArithShift =isSigned && !C.isMinSignedValue();

11328SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);

11329 Result = DAG.getNode(ISD::SHL, dl, VT,LHS, ShiftAmt);

11330 Overflow = DAG.getSetCC(dl, SetCCVT,

11331 DAG.getNode(UseArithShift ?ISD::SRA :ISD::SRL,

11332 dl, VT, Result, ShiftAmt),

11333LHS,ISD::SETNE);

11334returntrue;

11335 }

11336 }

11337

11338EVT WideVT =EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);

11339if (VT.isVector())

11340 WideVT =

11341EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());

11342

11343SDValue BottomHalf;

11344SDValue TopHalf;

11345staticconstunsigned Ops[2][3] =

11346 { {ISD::MULHU,ISD::UMUL_LOHI,ISD::ZERO_EXTEND },

11347 {ISD::MULHS,ISD::SMUL_LOHI,ISD::SIGN_EXTEND }};

11348if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {

11349 BottomHalf = DAG.getNode(ISD::MUL, dl, VT,LHS,RHS);

11350 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT,LHS,RHS);

11351 }elseif (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {

11352 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT),LHS,

11353RHS);

11354 TopHalf = BottomHalf.getValue(1);

11355 }elseif (isTypeLegal(WideVT)) {

11356LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT,LHS);

11357RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT,RHS);

11358SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT,LHS,RHS);

11359 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,Mul);

11360SDValue ShiftAmt =

11361 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);

11362 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,

11363 DAG.getNode(ISD::SRL, dl, WideVT,Mul, ShiftAmt));

11364 }else {

11365if (VT.isVector())

11366returnfalse;

11367

11368 forceExpandWideMUL(DAG, dl,isSigned,LHS,RHS, BottomHalf, TopHalf);

11369 }

11370

11371 Result = BottomHalf;

11372if (isSigned) {

11373SDValue ShiftAmt = DAG.getShiftAmountConstant(

11374 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);

11375SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);

11376 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign,ISD::SETNE);

11377 }else {

11378 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,

11379 DAG.getConstant(0, dl, VT),ISD::SETNE);

11380 }

11381

11382// Truncate the result if SetCC returns a larger type than needed.

11383EVT RType = Node->getValueType(1);

11384if (RType.bitsLT(Overflow.getValueType()))

11385 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);

11386

11387assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&

11388"Unexpected result type for S/UMULO legalization");

11389returntrue;

11390}

11391

11392SDValue TargetLowering::expandVecReduce(SDNode *Node,SelectionDAG &DAG) const{

11393SDLoc dl(Node);

11394unsigned BaseOpcode =ISD::getVecReduceBaseOpcode(Node->getOpcode());

11395SDValue Op = Node->getOperand(0);

11396EVT VT =Op.getValueType();

11397

11398if (VT.isScalableVector())

11399report_fatal_error(

11400"Expanding reductions for scalable vectors is undefined.");

11401

11402// Try to use a shuffle reduction for power of two vectors.

11403if (VT.isPow2VectorType()) {

11404while (VT.getVectorNumElements() > 1) {

11405EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());

11406if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))

11407break;

11408

11409SDValue Lo,Hi;

11410 std::tie(Lo,Hi) = DAG.SplitVector(Op, dl);

11411Op = DAG.getNode(BaseOpcode, dl, HalfVT,Lo,Hi, Node->getFlags());

11412 VT = HalfVT;

11413 }

11414 }

11415

11416EVT EltVT = VT.getVectorElementType();

11417unsigned NumElts = VT.getVectorNumElements();

11418

11419SmallVector<SDValue, 8> Ops;

11420 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);

11421

11422SDValue Res = Ops[0];

11423for (unsigned i = 1; i < NumElts; i++)

11424 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());

11425

11426// Result type may be wider than element type.

11427if (EltVT != Node->getValueType(0))

11428 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);

11429return Res;

11430}

11431

11432SDValue TargetLowering::expandVecReduceSeq(SDNode *Node,SelectionDAG &DAG) const{

11433SDLoc dl(Node);

11434SDValue AccOp = Node->getOperand(0);

11435SDValue VecOp = Node->getOperand(1);

11436SDNodeFlags Flags = Node->getFlags();

11437

11438EVT VT = VecOp.getValueType();

11439EVT EltVT = VT.getVectorElementType();

11440

11441if (VT.isScalableVector())

11442report_fatal_error(

11443"Expanding reductions for scalable vectors is undefined.");

11444

11445unsigned NumElts = VT.getVectorNumElements();

11446

11447SmallVector<SDValue, 8> Ops;

11448 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);

11449

11450unsigned BaseOpcode =ISD::getVecReduceBaseOpcode(Node->getOpcode());

11451

11452SDValue Res = AccOp;

11453for (unsigned i = 0; i < NumElts; i++)

11454 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);

11455

11456return Res;

11457}

11458

11459boolTargetLowering::expandREM(SDNode *Node,SDValue &Result,

11460SelectionDAG &DAG) const{

11461EVT VT = Node->getValueType(0);

11462SDLoc dl(Node);

11463boolisSigned = Node->getOpcode() ==ISD::SREM;

11464unsigned DivOpc =isSigned ?ISD::SDIV :ISD::UDIV;

11465unsigned DivRemOpc =isSigned ?ISD::SDIVREM :ISD::UDIVREM;

11466SDValue Dividend = Node->getOperand(0);

11467SDValue Divisor = Node->getOperand(1);

11468if (isOperationLegalOrCustom(DivRemOpc, VT)) {

11469SDVTList VTs = DAG.getVTList(VT, VT);

11470 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);

11471returntrue;

11472 }

11473if (isOperationLegalOrCustom(DivOpc, VT)) {

11474// X % Y -> X-X/Y*Y

11475SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);

11476SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);

11477 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend,Mul);

11478returntrue;

11479 }

11480returnfalse;

11481}

11482

11483SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,

11484SelectionDAG &DAG) const{

11485bool IsSigned = Node->getOpcode() ==ISD::FP_TO_SINT_SAT;

11486SDLoc dl(SDValue(Node, 0));

11487SDValue Src = Node->getOperand(0);

11488

11489// DstVT is the result type, while SatVT is the size to which we saturate

11490EVT SrcVT = Src.getValueType();

11491EVT DstVT = Node->getValueType(0);

11492

11493EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();

11494unsigned SatWidth = SatVT.getScalarSizeInBits();

11495unsigned DstWidth = DstVT.getScalarSizeInBits();

11496assert(SatWidth <= DstWidth &&

11497"Expected saturation width smaller than result width");

11498

11499// Determine minimum and maximum integer values and their corresponding

11500// floating-point values.

11501APInt MinInt, MaxInt;

11502if (IsSigned) {

11503 MinInt =APInt::getSignedMinValue(SatWidth).sext(DstWidth);

11504 MaxInt =APInt::getSignedMaxValue(SatWidth).sext(DstWidth);

11505 }else {

11506 MinInt =APInt::getMinValue(SatWidth).zext(DstWidth);

11507 MaxInt =APInt::getMaxValue(SatWidth).zext(DstWidth);

11508 }

11509

11510// We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as

11511// libcall emission cannot handle this. Large result types will fail.

11512if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {

11513 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);

11514 SrcVT = Src.getValueType();

11515 }

11516

11517constfltSemantics &Sem = SrcVT.getFltSemantics();

11518APFloat MinFloat(Sem);

11519APFloat MaxFloat(Sem);

11520

11521APFloat::opStatus MinStatus =

11522 MinFloat.convertFromAPInt(MinInt, IsSigned,APFloat::rmTowardZero);

11523APFloat::opStatus MaxStatus =

11524 MaxFloat.convertFromAPInt(MaxInt, IsSigned,APFloat::rmTowardZero);

11525bool AreExactFloatBounds = !(MinStatus &APFloat::opStatus::opInexact) &&

11526 !(MaxStatus &APFloat::opStatus::opInexact);

11527

11528SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);

11529SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);

11530

11531// If the integer bounds are exactly representable as floats and min/max are

11532// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence

11533// of comparisons and selects.

11534bool MinMaxLegal =isOperationLegal(ISD::FMINNUM, SrcVT) &&

11535isOperationLegal(ISD::FMAXNUM, SrcVT);

11536if (AreExactFloatBounds && MinMaxLegal) {

11537SDValue Clamped = Src;

11538

11539// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.

11540 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);

11541// Clamp by MaxFloat from above. NaN cannot occur.

11542 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);

11543// Convert clamped value to integer.

11544SDValue FpToInt = DAG.getNode(IsSigned ?ISD::FP_TO_SINT :ISD::FP_TO_UINT,

11545 dl, DstVT, Clamped);

11546

11547// In the unsigned case we're done, because we mapped NaN to MinFloat,

11548// which will cast to zero.

11549if (!IsSigned)

11550return FpToInt;

11551

11552// Otherwise, select 0 if Src is NaN.

11553SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);

11554EVT SetCCVT =

11555getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);

11556SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src,ISD::CondCode::SETUO);

11557return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);

11558 }

11559

11560SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);

11561SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);

11562

11563// Result of direct conversion. The assumption here is that the operation is

11564// non-trapping and it's fine to apply it to an out-of-range value if we

11565// select it away later.

11566SDValue FpToInt =

11567 DAG.getNode(IsSigned ?ISD::FP_TO_SINT :ISD::FP_TO_UINT, dl, DstVT, Src);

11568

11569SDValue Select = FpToInt;

11570

11571EVT SetCCVT =

11572getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);

11573

11574// If Src ULT MinFloat, select MinInt. In particular, this also selects

11575// MinInt if Src is NaN.

11576SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode,ISD::SETULT);

11577Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode,Select);

11578// If Src OGT MaxFloat, select MaxInt.

11579SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode,ISD::SETOGT);

11580Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode,Select);

11581

11582// In the unsigned case we are done, because we mapped NaN to MinInt, which

11583// is already zero.

11584if (!IsSigned)

11585returnSelect;

11586

11587// Otherwise, select 0 if Src is NaN.

11588SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);

11589SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src,ISD::CondCode::SETUO);

11590return DAG.getSelect(dl, DstVT, IsNan, ZeroInt,Select);

11591}

11592

11593SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT,SDValue Op,

11594constSDLoc &dl,

11595SelectionDAG &DAG) const{

11596EVT OperandVT =Op.getValueType();

11597if (OperandVT.getScalarType() == ResultVT.getScalarType())

11598returnOp;

11599EVT ResultIntVT = ResultVT.changeTypeToInteger();

11600// We are rounding binary64/binary128 -> binary32 -> bfloat16. This

11601// can induce double-rounding which may alter the results. We can

11602// correct for this using a trick explained in: Boldo, Sylvie, and

11603// Guillaume Melquiond. "When double rounding is odd." 17th IMACS

11604// World Congress. 2005.

11605unsigned BitSize = OperandVT.getScalarSizeInBits();

11606EVT WideIntVT = OperandVT.changeTypeToInteger();

11607SDValue OpAsInt = DAG.getBitcast(WideIntVT,Op);

11608SDValue SignBit =

11609 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,

11610 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));

11611SDValue AbsWide;

11612if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {

11613 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT,Op);

11614 }else {

11615SDValue ClearedSign = DAG.getNode(

11616ISD::AND, dl, WideIntVT, OpAsInt,

11617 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));

11618 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);

11619 }

11620SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);

11621SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);

11622

11623// We can keep the narrow value as-is if narrowing was exact (no

11624// rounding error), the wide value was NaN (the narrow value is also

11625// NaN and should be preserved) or if we rounded to the odd value.

11626SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);

11627SDValue One = DAG.getConstant(1, dl, ResultIntVT);

11628SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);

11629SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);

11630EVT ResultIntVTCCVT =getSetCCResultType(

11631 DAG.getDataLayout(), *DAG.getContext(),And.getValueType());

11632SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);

11633// The result is already odd so we don't need to do anything.

11634SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT,And, Zero,ISD::SETNE);

11635

11636EVT WideSetCCVT =getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),

11637 AbsWide.getValueType());

11638// We keep results which are exact, odd or NaN.

11639SDValue KeepNarrow =

11640 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide,ISD::SETUEQ);

11641 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);

11642// We morally performed a round-down if AbsNarrow is smaller than

11643// AbsWide.

11644SDValue NarrowIsRd =

11645 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide,ISD::SETOGT);

11646// If the narrow value is odd or exact, pick it.

11647// Otherwise, narrow is even and corresponds to either the rounded-up

11648// or rounded-down value. If narrow is the rounded-down value, we want

11649// the rounded-up value as it will be odd.

11650SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);

11651SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);

11652Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);

11653int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();

11654SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);

11655 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);

11656 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);

11657Op = DAG.getNode(ISD::OR, dl, ResultIntVT,Op, SignBit);

11658return DAG.getNode(ISD::BITCAST, dl, ResultVT,Op);

11659}

11660

11661SDValue TargetLowering::expandFP_ROUND(SDNode *Node,SelectionDAG &DAG) const{

11662assert(Node->getOpcode() ==ISD::FP_ROUND &&"Unexpected opcode!");

11663SDValue Op = Node->getOperand(0);

11664EVT VT = Node->getValueType(0);

11665SDLoc dl(Node);

11666if (VT.getScalarType() == MVT::bf16) {

11667if (Node->getConstantOperandVal(1) == 1) {

11668return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));

11669 }

11670EVT OperandVT =Op.getValueType();

11671SDValue IsNaN = DAG.getSetCC(

11672 dl,

11673getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),

11674Op,Op,ISD::SETUO);

11675

11676// We are rounding binary64/binary128 -> binary32 -> bfloat16. This

11677// can induce double-rounding which may alter the results. We can

11678// correct for this using a trick explained in: Boldo, Sylvie, and

11679// Guillaume Melquiond. "When double rounding is odd." 17th IMACS

11680// World Congress. 2005.

11681EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;

11682EVT I32 =F32.changeTypeToInteger();

11683Op = expandRoundInexactToOdd(F32,Op, dl, DAG);

11684Op = DAG.getNode(ISD::BITCAST, dl, I32,Op);

11685

11686// Conversions should set NaN's quiet bit. This also prevents NaNs from

11687// turning into infinities.

11688SDValue NaN =

11689 DAG.getNode(ISD::OR, dl, I32,Op, DAG.getConstant(0x400000, dl, I32));

11690

11691// Factor in the contribution of the low 16 bits.

11692SDValue One = DAG.getConstant(1, dl, I32);

11693SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32,Op,

11694 DAG.getShiftAmountConstant(16, I32, dl));

11695 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);

11696SDValue RoundingBias =

11697 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);

11698SDValue Add = DAG.getNode(ISD::ADD, dl, I32,Op, RoundingBias);

11699

11700// Don't round if we had a NaN, we don't want to turn 0x7fffffff into

11701// 0x80000000.

11702Op = DAG.getSelect(dl, I32, IsNaN, NaN,Add);

11703

11704// Now that we have rounded, shift the bits into position.

11705Op = DAG.getNode(ISD::SRL, dl, I32,Op,

11706 DAG.getShiftAmountConstant(16, I32, dl));

11707Op = DAG.getNode(ISD::BITCAST, dl, I32,Op);

11708EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;

11709Op = DAG.getNode(ISD::TRUNCATE, dl, I16,Op);

11710return DAG.getNode(ISD::BITCAST, dl, VT,Op);

11711 }

11712returnSDValue();

11713}

11714

11715SDValue TargetLowering::expandVectorSplice(SDNode *Node,

11716SelectionDAG &DAG) const{

11717assert(Node->getOpcode() ==ISD::VECTOR_SPLICE &&"Unexpected opcode!");

11718assert(Node->getValueType(0).isScalableVector() &&

11719"Fixed length vector types expected to use SHUFFLE_VECTOR!");

11720

11721EVT VT = Node->getValueType(0);

11722SDValue V1 = Node->getOperand(0);

11723SDValue V2 = Node->getOperand(1);

11724 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();

11725SDLoc DL(Node);

11726

11727// Expand through memory thusly:

11728// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr

11729// Store V1, Ptr

11730// Store V2, Ptr + sizeof(V1)

11731// If (Imm < 0)

11732// TrailingElts = -Imm

11733// Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))

11734// else

11735// Ptr = Ptr + (Imm * sizeof(VT.Elt))

11736// Res = Load Ptr

11737

11738Align Alignment = DAG.getReducedAlign(VT,/*UseABI=*/false);

11739

11740EVT MemVT =EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),

11741 VT.getVectorElementCount() * 2);

11742SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);

11743EVT PtrVT = StackPtr.getValueType();

11744auto &MF = DAG.getMachineFunction();

11745auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

11746auto PtrInfo =MachinePointerInfo::getFixedStack(MF, FrameIndex);

11747

11748// Store the lo part of CONCAT_VECTORS(V1, V2)

11749SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(),DL, V1, StackPtr, PtrInfo);

11750// Store the hi part of CONCAT_VECTORS(V1, V2)

11751SDValue OffsetToV2 = DAG.getVScale(

11752DL, PtrVT,

11753APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));

11754SDValue StackPtr2 = DAG.getNode(ISD::ADD,DL, PtrVT, StackPtr, OffsetToV2);

11755SDValue StoreV2 = DAG.getStore(StoreV1,DL, V2, StackPtr2, PtrInfo);

11756

11757if (Imm >= 0) {

11758// Load back the required element. getVectorElementPointer takes care of

11759// clamping the index if it's out-of-bounds.

11760 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));

11761// Load the spliced result

11762return DAG.getLoad(VT,DL, StoreV2, StackPtr,

11763MachinePointerInfo::getUnknownStack(MF));

11764 }

11765

11766uint64_t TrailingElts = -Imm;

11767

11768// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.

11769TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();

11770SDValue TrailingBytes =

11771 DAG.getConstant(TrailingElts * EltByteSize,DL, PtrVT);

11772

11773if (TrailingElts > VT.getVectorMinNumElements()) {

11774SDValue VLBytes =

11775 DAG.getVScale(DL, PtrVT,

11776APInt(PtrVT.getFixedSizeInBits(),

11777 VT.getStoreSize().getKnownMinValue()));

11778 TrailingBytes = DAG.getNode(ISD::UMIN,DL, PtrVT, TrailingBytes, VLBytes);

11779 }

11780

11781// Calculate the start address of the spliced result.

11782 StackPtr2 = DAG.getNode(ISD::SUB,DL, PtrVT, StackPtr2, TrailingBytes);

11783

11784// Load the spliced result

11785return DAG.getLoad(VT,DL, StoreV2, StackPtr2,

11786MachinePointerInfo::getUnknownStack(MF));

11787}

11788

11789SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,

11790SelectionDAG &DAG) const{

11791SDLoc DL(Node);

11792SDValue Vec = Node->getOperand(0);

11793SDValue Mask = Node->getOperand(1);

11794SDValue Passthru = Node->getOperand(2);

11795

11796EVT VecVT = Vec.getValueType();

11797EVT ScalarVT = VecVT.getScalarType();

11798EVT MaskVT = Mask.getValueType();

11799EVT MaskScalarVT = MaskVT.getScalarType();

11800

11801// Needs to be handled by targets that have scalable vector types.

11802if (VecVT.isScalableVector())

11803report_fatal_error("Cannot expand masked_compress for scalable vectors.");

11804

11805SDValue StackPtr = DAG.CreateStackTemporary(

11806 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT,/*UseABI=*/false));

11807int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

11808MachinePointerInfo PtrInfo =

11809MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);

11810

11811MVT PositionVT =getVectorIdxTy(DAG.getDataLayout());

11812SDValue Chain = DAG.getEntryNode();

11813SDValue OutPos = DAG.getConstant(0,DL, PositionVT);

11814

11815bool HasPassthru = !Passthru.isUndef();

11816

11817// If we have a passthru vector, store it on the stack, overwrite the matching

11818// positions and then re-write the last element that was potentially

11819// overwritten even though mask[i] = false.

11820if (HasPassthru)

11821 Chain = DAG.getStore(Chain,DL, Passthru, StackPtr, PtrInfo);

11822

11823SDValue LastWriteVal;

11824APInt PassthruSplatVal;

11825bool IsSplatPassthru =

11826ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);

11827

11828if (IsSplatPassthru) {

11829// As we do not know which position we wrote to last, we cannot simply

11830// access that index from the passthru vector. So we first check if passthru

11831// is a splat vector, to use any element ...

11832 LastWriteVal = DAG.getConstant(PassthruSplatVal,DL, ScalarVT);

11833 }elseif (HasPassthru) {

11834// ... if it is not a splat vector, we need to get the passthru value at

11835// position = popcount(mask) and re-load it from the stack before it is

11836// overwritten in the loop below.

11837EVT PopcountVT = ScalarVT.changeTypeToInteger();

11838SDValue Popcount = DAG.getNode(

11839ISD::TRUNCATE,DL, MaskVT.changeVectorElementType(MVT::i1), Mask);

11840 Popcount =

11841 DAG.getNode(ISD::ZERO_EXTEND,DL,

11842 MaskVT.changeVectorElementType(PopcountVT), Popcount);

11843 Popcount = DAG.getNode(ISD::VECREDUCE_ADD,DL, PopcountVT, Popcount);

11844SDValue LastElmtPtr =

11845 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);

11846 LastWriteVal = DAG.getLoad(

11847 ScalarVT,DL, Chain, LastElmtPtr,

11848MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));

11849 Chain = LastWriteVal.getValue(1);

11850 }

11851

11852unsigned NumElms = VecVT.getVectorNumElements();

11853for (unsignedI = 0;I < NumElms;I++) {

11854SDValue Idx = DAG.getVectorIdxConstant(I,DL);

11855

11856SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, ScalarVT, Vec,Idx);

11857SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);

11858 Chain = DAG.getStore(

11859 Chain,DL, ValI, OutPtr,

11860MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));

11861

11862// Get the mask value and add it to the current output position. This

11863// either increments by 1 if MaskI is true or adds 0 otherwise.

11864// Freeze in case we have poison/undef mask entries.

11865SDValue MaskI = DAG.getFreeze(

11866 DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, MaskScalarVT, Mask,Idx));

11867 MaskI = DAG.getFreeze(MaskI);

11868 MaskI = DAG.getNode(ISD::TRUNCATE,DL, MVT::i1, MaskI);

11869 MaskI = DAG.getNode(ISD::ZERO_EXTEND,DL, PositionVT, MaskI);

11870 OutPos = DAG.getNode(ISD::ADD,DL, PositionVT, OutPos, MaskI);

11871

11872if (HasPassthru &&I == NumElms - 1) {

11873SDValue EndOfVector =

11874 DAG.getConstant(VecVT.getVectorNumElements() - 1,DL, PositionVT);

11875SDValue AllLanesSelected =

11876 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector,ISD::CondCode::SETUGT);

11877 OutPos = DAG.getNode(ISD::UMIN,DL, PositionVT, OutPos, EndOfVector);

11878 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);

11879

11880// Re-write the last ValI if all lanes were selected. Otherwise,

11881// overwrite the last write it with the passthru value.

11882 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,

11883 LastWriteVal,SDNodeFlags::Unpredictable);

11884 Chain = DAG.getStore(

11885 Chain,DL, LastWriteVal, OutPtr,

11886MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));

11887 }

11888 }

11889

11890return DAG.getLoad(VecVT,DL, Chain, StackPtr, PtrInfo);

11891}

11892

11893boolTargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG,EVT VT,

11894SDValue &LHS,SDValue &RHS,

11895SDValue &CC,SDValue Mask,

11896SDValue EVL,bool &NeedInvert,

11897constSDLoc &dl,SDValue &Chain,

11898bool IsSignaling) const{

11899MVT OpVT =LHS.getSimpleValueType();

11900ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();

11901 NeedInvert =false;

11902assert(!EVL == !Mask &&"VP Mask and EVL must either both be set or unset");

11903bool IsNonVP = !EVL;

11904switch (getCondCodeAction(CCCode, OpVT)) {

11905default:

11906llvm_unreachable("Unknown condition code action!");

11907caseTargetLowering::Legal:

11908// Nothing to do.

11909break;

11910caseTargetLowering::Expand: {

11911ISD::CondCode InvCC =ISD::getSetCCSwappedOperands(CCCode);

11912if (isCondCodeLegalOrCustom(InvCC, OpVT)) {

11913std::swap(LHS,RHS);

11914CC = DAG.getCondCode(InvCC);

11915returntrue;

11916 }

11917// Swapping operands didn't work. Try inverting the condition.

11918bool NeedSwap =false;

11919 InvCC = getSetCCInverse(CCCode, OpVT);

11920if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {

11921// If inverting the condition is not enough, try swapping operands

11922// on top of it.

11923 InvCC =ISD::getSetCCSwappedOperands(InvCC);

11924 NeedSwap =true;

11925 }

11926if (isCondCodeLegalOrCustom(InvCC, OpVT)) {

11927CC = DAG.getCondCode(InvCC);

11928 NeedInvert =true;

11929if (NeedSwap)

11930std::swap(LHS,RHS);

11931returntrue;

11932 }

11933

11934// Special case: expand i1 comparisons using logical operations.

11935if (OpVT == MVT::i1) {

11936SDValue Ret;

11937switch (CCCode) {

11938default:

11939llvm_unreachable("Unknown integer setcc!");

11940caseISD::SETEQ:// X == Y --> ~(X ^ Y)

11941 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1,LHS,RHS),

11942 MVT::i1);

11943break;

11944caseISD::SETNE:// X != Y --> (X ^ Y)

11945 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1,LHS,RHS);

11946break;

11947caseISD::SETGT:// X >s Y --> X == 0 & Y == 1 --> ~X & Y

11948caseISD::SETULT:// X <u Y --> X == 0 & Y == 1 --> ~X & Y

11949 Ret = DAG.getNode(ISD::AND, dl, MVT::i1,RHS,

11950 DAG.getNOT(dl,LHS, MVT::i1));

11951break;

11952caseISD::SETLT:// X <s Y --> X == 1 & Y == 0 --> ~Y & X

11953caseISD::SETUGT:// X >u Y --> X == 1 & Y == 0 --> ~Y & X

11954 Ret = DAG.getNode(ISD::AND, dl, MVT::i1,LHS,

11955 DAG.getNOT(dl,RHS, MVT::i1));

11956break;

11957caseISD::SETULE:// X <=u Y --> X == 0 | Y == 1 --> ~X | Y

11958caseISD::SETGE:// X >=s Y --> X == 0 | Y == 1 --> ~X | Y

11959 Ret = DAG.getNode(ISD::OR, dl, MVT::i1,RHS,

11960 DAG.getNOT(dl,LHS, MVT::i1));

11961break;

11962caseISD::SETUGE:// X >=u Y --> X == 1 | Y == 0 --> ~Y | X

11963caseISD::SETLE:// X <=s Y --> X == 1 | Y == 0 --> ~Y | X

11964 Ret = DAG.getNode(ISD::OR, dl, MVT::i1,LHS,

11965 DAG.getNOT(dl,RHS, MVT::i1));

11966break;

11967 }

11968

11969LHS = DAG.getZExtOrTrunc(Ret, dl, VT);

11970RHS =SDValue();

11971CC =SDValue();

11972returntrue;

11973 }

11974

11975ISD::CondCode CC1 =ISD::SETCC_INVALID, CC2 =ISD::SETCC_INVALID;

11976unsigned Opc = 0;

11977switch (CCCode) {

11978default:

11979llvm_unreachable("Don't know how to expand this condition!");

11980caseISD::SETUO:

11981if (isCondCodeLegal(ISD::SETUNE, OpVT)) {

11982 CC1 =ISD::SETUNE;

11983 CC2 =ISD::SETUNE;

11984 Opc =ISD::OR;

11985break;

11986 }

11987assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&

11988"If SETUE is expanded, SETOEQ or SETUNE must be legal!");

11989 NeedInvert =true;

11990 [[fallthrough]];

11991caseISD::SETO:

11992assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&

11993"If SETO is expanded, SETOEQ must be legal!");

11994 CC1 =ISD::SETOEQ;

11995 CC2 =ISD::SETOEQ;

11996 Opc =ISD::AND;

11997break;

11998caseISD::SETONE:

11999caseISD::SETUEQ:

12000// If the SETUO or SETO CC isn't legal, we might be able to use

12001// SETOGT || SETOLT, inverting the result for SETUEQ. We only need one

12002// of SETOGT/SETOLT to be legal, the other can be emulated by swapping

12003// the operands.

12004 CC2 = ((unsigned)CCCode & 0x8U) ?ISD::SETUO :ISD::SETO;

12005if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||

12006isCondCodeLegal(ISD::SETOLT, OpVT))) {

12007 CC1 =ISD::SETOGT;

12008 CC2 =ISD::SETOLT;

12009 Opc =ISD::OR;

12010 NeedInvert = ((unsigned)CCCode & 0x8U);

12011break;

12012 }

12013 [[fallthrough]];

12014caseISD::SETOEQ:

12015caseISD::SETOGT:

12016caseISD::SETOGE:

12017caseISD::SETOLT:

12018caseISD::SETOLE:

12019caseISD::SETUNE:

12020caseISD::SETUGT:

12021caseISD::SETUGE:

12022caseISD::SETULT:

12023caseISD::SETULE:

12024// If we are floating point, assign and break, otherwise fall through.

12025if (!OpVT.isInteger()) {

12026// We can use the 4th bit to tell if we are the unordered

12027// or ordered version of the opcode.

12028 CC2 = ((unsigned)CCCode & 0x8U) ?ISD::SETUO :ISD::SETO;

12029 Opc = ((unsigned)CCCode & 0x8U) ?ISD::OR :ISD::AND;

12030 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);

12031break;

12032 }

12033// Fallthrough if we are unsigned integer.

12034 [[fallthrough]];

12035caseISD::SETLE:

12036caseISD::SETGT:

12037caseISD::SETGE:

12038caseISD::SETLT:

12039caseISD::SETNE:

12040caseISD::SETEQ:

12041// If all combinations of inverting the condition and swapping operands

12042// didn't work then we have no means to expand the condition.

12043llvm_unreachable("Don't know how to expand this condition!");

12044 }

12045

12046SDValue SetCC1, SetCC2;

12047if (CCCode !=ISD::SETO && CCCode !=ISD::SETUO) {

12048// If we aren't the ordered or unorder operation,

12049// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).

12050if (IsNonVP) {

12051 SetCC1 = DAG.getSetCC(dl, VT,LHS,RHS, CC1, Chain, IsSignaling);

12052 SetCC2 = DAG.getSetCC(dl, VT,LHS,RHS, CC2, Chain, IsSignaling);

12053 }else {

12054 SetCC1 = DAG.getSetCCVP(dl, VT,LHS,RHS, CC1, Mask, EVL);

12055 SetCC2 = DAG.getSetCCVP(dl, VT,LHS,RHS, CC2, Mask, EVL);

12056 }

12057 }else {

12058// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)

12059if (IsNonVP) {

12060 SetCC1 = DAG.getSetCC(dl, VT,LHS,LHS, CC1, Chain, IsSignaling);

12061 SetCC2 = DAG.getSetCC(dl, VT,RHS,RHS, CC2, Chain, IsSignaling);

12062 }else {

12063 SetCC1 = DAG.getSetCCVP(dl, VT,LHS,LHS, CC1, Mask, EVL);

12064 SetCC2 = DAG.getSetCCVP(dl, VT,RHS,RHS, CC2, Mask, EVL);

12065 }

12066 }

12067if (Chain)

12068 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),

12069 SetCC2.getValue(1));

12070if (IsNonVP)

12071LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);

12072else {

12073// Transform the binary opcode to the VP equivalent.

12074assert((Opc ==ISD::OR || Opc ==ISD::AND) &&"Unexpected opcode");

12075 Opc = Opc ==ISD::OR ? ISD::VP_OR : ISD::VP_AND;

12076LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);

12077 }

12078RHS =SDValue();

12079CC =SDValue();

12080returntrue;

12081 }

12082 }

12083returnfalse;

12084}

12085

12086SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,

12087SelectionDAG &DAG) const{

12088EVT VT = Node->getValueType(0);

12089// Despite its documentation, GetSplitDestVTs will assert if VT cannot be

12090// split into two equal parts.

12091if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))

12092returnSDValue();

12093

12094// Restrict expansion to cases where both parts can be concatenated.

12095auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);

12096if (LoVT != HiVT || !isTypeLegal(LoVT))

12097returnSDValue();

12098

12099SDLoc DL(Node);

12100unsigned Opcode = Node->getOpcode();

12101

12102// Don't expand if the result is likely to be unrolled anyway.

12103if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))

12104returnSDValue();

12105

12106SmallVector<SDValue, 4> LoOps, HiOps;

12107for (constSDValue &V : Node->op_values()) {

12108auto [Lo,Hi] = DAG.SplitVector(V,DL, LoVT, HiVT);

12109 LoOps.push_back(Lo);

12110 HiOps.push_back(Hi);

12111 }

12112

12113SDValue SplitOpLo = DAG.getNode(Opcode,DL, LoVT, LoOps);

12114SDValue SplitOpHi = DAG.getNode(Opcode,DL, HiVT, HiOps);

12115return DAG.getNode(ISD::CONCAT_VECTORS,DL, VT, SplitOpLo, SplitOpHi);

12116}

MRI

unsigned const MachineRegisterInfo * MRI

Definition:AArch64AdvSIMDScalarPass.cpp:105

F32

static const LLT F32

Definition:AMDGPULegalizerInfo.cpp:286

Select

AMDGPU Register Bank Select

Definition:AMDGPURegBankSelect.cpp:71

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

true

basic Basic Alias true

Definition:BasicAliasAnalysis.cpp:1981

Analysis

block Block Frequency Analysis

Definition:BlockFrequencyInfo.cpp:300

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

return RetTy

Definition:DeadArgumentElimination.cpp:361

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

DerivedTypes.h

DivisionByConstantInfo.h

Addr

uint64_t Addr

Definition:ELFObjHandler.cpp:79

Size

uint64_t Size

Definition:ELFObjHandler.cpp:81

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

isSigned

static bool isSigned(unsigned int Opcode)

Definition:ExpandLargeDivRem.cpp:52

GlobalVariable.h

ShrinkDemandedConstant

static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)

Check to see if the specified operand of the specified instruction is a constant integer.

Definition:InstCombineSimplifyDemanded.cpp:42

#define RegName(no)

static LVOptions Options

Definition:LVOptions.cpp:25

info

lazy value info

Definition:LazyValueInfo.cpp:61

isNonZeroModBitWidthOrUndef

static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)

Definition:LegalizerHelper.cpp:7157

isZero

static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)

Definition:Lint.cpp:557

MCAsmInfo.h

MCExpr.h

#define F(x, y, z)

Definition:MD5.cpp:55

#define I(x, y, z)

Definition:MD5.cpp:58

#define G(x, y, z)

Definition:MD5.cpp:56

MachineFrameInfo.h

MachineFunction.h

MachineJumpTableInfo.h

MachineRegisterInfo.h

isUndef

static bool isUndef(const MachineInstr &MI)

Definition:MachineSSAContext.cpp:57

TRI

unsigned const TargetRegisterInfo * TRI

Definition:MachineSink.cpp:2029

MathExtras.h

#define T1

Definition:Mips16ISelLowering.cpp:340

Signed

@ Signed

Definition:NVPTXISelLowering.cpp:4789

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

#define P(N)

Passes

Function const char * Passes

Definition:PassBuilderBindings.cpp:51

Merge

R600 Clause Merge

Definition:R600ClauseMergePass.cpp:70

Cond

const SmallVectorImpl< MachineOperand > & Cond

Definition:RISCVRedundantCopyElimination.cpp:75

auto CC

Definition:RISCVRedundantCopyElimination.cpp:79

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h

This file contains some templates that are useful if you are working with the STL at all.

SelectionDAG.h

Ptr

@ Ptr

Definition:TargetLibraryInfo.cpp:77

foldSetCCWithFunnelShift

static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)

Definition:TargetLowering.cpp:4437

lowerImmediateIfPossible

static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)

If we have an immediate, see if we can lower it.

Definition:TargetLowering.cpp:6070

expandVPFunnelShift

static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)

Definition:TargetLowering.cpp:7977

getKnownUndefForVectorBinop

static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)

Given a vector binary operation and known undefined elements for each input operand,...

Definition:TargetLowering.cpp:3031

BuildExactUDIV

static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)

Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...

Definition:TargetLowering.cpp:6207

clampDynamicVectorIndex

static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)

Definition:TargetLowering.cpp:10449

getConstraintPiority

static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)

Return a number indicating our preference for chosing a type of constraint over another,...

Definition:TargetLowering.cpp:5923

isFCmpEqualZero

static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)

Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...

Definition:TargetLowering.cpp:8703

turnVectorIntoSplatVector

static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())

If all values in Values that don't match the predicate are same 'splat' value, then replace all value...

Definition:TargetLowering.cpp:6700

canExpandVectorCTPOP

static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)

Definition:TargetLowering.cpp:9073

foldSetCCWithRotate

static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)

Definition:TargetLowering.cpp:4394

BuildExactSDIV

static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)

Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...

Definition:TargetLowering.cpp:6147

simplifySetCCWithCTPOP

static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)

Definition:TargetLowering.cpp:4320

combineShiftToAVG

static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)

Definition:TargetLowering.cpp:987

TargetLowering.h

This file describes how to lower LLVM code to machine code.

TargetRegisterInfo.h

UndefPoisonKind::PoisonOnly

@ PoisonOnly

ValueTracking.h

VectorUtils.h

Lookup

static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)

Definition:X86FloatingPoint.cpp:613

scalarizeVectorStore

static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)

Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.

Definition:X86ISelLowering.cpp:25131

RHS

Value * RHS

Definition:X86PartialReduction.cpp:74

LHS

Value * LHS

Definition:X86PartialReduction.cpp:73

Node

Definition:ItaniumDemangle.h:163

Predicate

Definition:AMDGPURegBankLegalizeRules.cpp:332

llvm::APFloat

Definition:APFloat.h:904

llvm::APFloat::convertFromAPInt

opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)

Definition:APFloat.h:1334

llvm::APFloat::getSmallestNormalized

static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)

Returns the smallest (by magnitude) normalized finite number in the given semantics.

Definition:APFloat.h:1160

llvm::APFloat::bitcastToAPInt

APInt bitcastToAPInt() const

Definition:APFloat.h:1351

llvm::APFloat::getLargest

static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)

Returns the largest finite number in the given semantics.

Definition:APFloat.h:1140

llvm::APFloat::getInf

static APFloat getInf(const fltSemantics &Sem, bool Negative=false)

Factory for Positive and Negative Infinity.

Definition:APFloat.h:1100

llvm::APFloat::getNaN

static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)

Factory for NaN values.

Definition:APFloat.h:1111

llvm::APInt

Class for arbitrary precision integers.

Definition:APInt.h:78

llvm::APInt::udiv

APInt udiv(const APInt &RHS) const

Unsigned division operation.

Definition:APInt.cpp:1547

llvm::APInt::getAllOnes

static APInt getAllOnes(unsigned numBits)

Return an APInt of a specified width with all bits set.

Definition:APInt.h:234

llvm::APInt::udivrem

static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)

Dual division/remainder interface.

Definition:APInt.cpp:1732

llvm::APInt::clearBit

void clearBit(unsigned BitPosition)

Set a given bit to 0.

Definition:APInt.h:1407

llvm::APInt::isNegatedPowerOf2

bool isNegatedPowerOf2() const

Check if this APInt's negated value is a power of two greater than zero.

Definition:APInt.h:449

llvm::APInt::zext

APInt zext(unsigned width) const

Zero extend to a new width.

Definition:APInt.cpp:986

llvm::APInt::getSignMask

static APInt getSignMask(unsigned BitWidth)

Get the SignMask for a specific bit width.

Definition:APInt.h:229

llvm::APInt::isMinSignedValue

bool isMinSignedValue() const

Determine if this is the smallest signed value.

Definition:APInt.h:423

llvm::APInt::getZExtValue

uint64_t getZExtValue() const

Get zero extended value.

Definition:APInt.h:1520

llvm::APInt::setHighBits

void setHighBits(unsigned hiBits)

Set the top hiBits bits.

Definition:APInt.h:1392

llvm::APInt::setBitsFrom

void setBitsFrom(unsigned loBit)

Set the top bits starting from loBit.

Definition:APInt.h:1386

llvm::APInt::zextOrTrunc

APInt zextOrTrunc(unsigned width) const

Zero extend or truncate to width.

Definition:APInt.cpp:1007

llvm::APInt::getActiveBits

unsigned getActiveBits() const

Compute the number of active bits in the value.

Definition:APInt.h:1492

llvm::APInt::trunc

APInt trunc(unsigned width) const

Truncate to new width.

Definition:APInt.cpp:910

llvm::APInt::getMaxValue

static APInt getMaxValue(unsigned numBits)

Gets maximum unsigned value of APInt for specific bit width.

Definition:APInt.h:206

llvm::APInt::setBit

void setBit(unsigned BitPosition)

Set the given bit to 1 whose position is given as "bitPosition".

Definition:APInt.h:1330

llvm::APInt::isAllOnes

bool isAllOnes() const

Determine if all bits are set. This is true for zero-width values.

Definition:APInt.h:371

llvm::APInt::ugt

bool ugt(const APInt &RHS) const

Unsigned greater than comparison.

Definition:APInt.h:1182

llvm::APInt::getBitsSet

static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)

Get a value with a block of bits set.

Definition:APInt.h:258

llvm::APInt::isZero

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

Definition:APInt.h:380

llvm::APInt::urem

APInt urem(const APInt &RHS) const

Unsigned remainder operation.

Definition:APInt.cpp:1640

llvm::APInt::setSignBit

void setSignBit()

Set the sign bit to 1.

Definition:APInt.h:1340

llvm::APInt::getBitWidth

unsigned getBitWidth() const

Return the number of bits in the APInt.

Definition:APInt.h:1468

llvm::APInt::getSignedMaxValue

static APInt getSignedMaxValue(unsigned numBits)

Gets maximum signed value of APInt for a specific bit width.

Definition:APInt.h:209

llvm::APInt::getMinValue

static APInt getMinValue(unsigned numBits)

Gets minimum unsigned value of APInt for a specific bit width.

Definition:APInt.h:216

llvm::APInt::isNegative

bool isNegative() const

Determine sign of this APInt.

Definition:APInt.h:329

llvm::APInt::intersects

bool intersects(const APInt &RHS) const

This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...

Definition:APInt.h:1249

llvm::APInt::clearAllBits

void clearAllBits()

Set every bit to 0.

Definition:APInt.h:1397

llvm::APInt::reverseBits

APInt reverseBits() const

Definition:APInt.cpp:741

llvm::APInt::ashrInPlace

void ashrInPlace(unsigned ShiftAmt)

Arithmetic right-shift this APInt by ShiftAmt in place.

Definition:APInt.h:834

llvm::APInt::negate

void negate()

Negate this APInt in place.

Definition:APInt.h:1450

llvm::APInt::countr_zero

unsigned countr_zero() const

Count the number of trailing zero bits.

Definition:APInt.h:1618

llvm::APInt::countl_zero

unsigned countl_zero() const

The APInt version of std::countl_zero.

Definition:APInt.h:1577

llvm::APInt::getSplat

static APInt getSplat(unsigned NewLen, const APInt &V)

Return a value containing V broadcasted over NewLen bits.

Definition:APInt.cpp:624

llvm::APInt::getSignedMinValue

static APInt getSignedMinValue(unsigned numBits)

Gets minimum signed value of APInt for a specific bit width.

Definition:APInt.h:219

llvm::APInt::getSignificantBits

unsigned getSignificantBits() const

Get the minimum bit size for this signed APInt.

Definition:APInt.h:1511

llvm::APInt::countLeadingZeros

unsigned countLeadingZeros() const

Definition:APInt.h:1585

llvm::APInt::isStrictlyPositive

bool isStrictlyPositive() const

Determine if this APInt Value is positive.

Definition:APInt.h:356

llvm::APInt::insertBits

void insertBits(const APInt &SubBits, unsigned bitPosition)

Insert the bits from a smaller APInt starting at bitPosition.

Definition:APInt.cpp:370

llvm::APInt::logBase2

unsigned logBase2() const

Definition:APInt.h:1739

llvm::APInt::getLimitedValue

uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const

If this value is smaller than the specified limit, return it, otherwise return the limit value.

Definition:APInt.h:475

llvm::APInt::setAllBits

void setAllBits()

Set every bit to 1.

Definition:APInt.h:1319

llvm::APInt::multiplicativeInverse

APInt multiplicativeInverse() const

Definition:APInt.cpp:1248

llvm::APInt::isMaxSignedValue

bool isMaxSignedValue() const

Determine if this is the largest signed value.

Definition:APInt.h:405

llvm::APInt::isNonNegative

bool isNonNegative() const

Determine if this APInt Value is non-negative (>= 0)

Definition:APInt.h:334

llvm::APInt::ule

bool ule(const APInt &RHS) const

Unsigned less or equal comparison.

Definition:APInt.h:1150

llvm::APInt::sext

APInt sext(unsigned width) const

Sign extend to a new width.

Definition:APInt.cpp:959

llvm::APInt::setBits

void setBits(unsigned loBit, unsigned hiBit)

Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.

Definition:APInt.h:1367

llvm::APInt::shl

APInt shl(unsigned shiftAmt) const

Left-shift function.

Definition:APInt.h:873

llvm::APInt::byteSwap

APInt byteSwap() const

Definition:APInt.cpp:719

llvm::APInt::isSubsetOf

bool isSubsetOf(const APInt &RHS) const

This operation checks that all bits set in this APInt are also set in RHS.

Definition:APInt.h:1257

llvm::APInt::isPowerOf2

bool isPowerOf2() const

Check if this APInt's value is a power of two greater than zero.

Definition:APInt.h:440

llvm::APInt::getLowBitsSet

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

Definition:APInt.h:306

llvm::APInt::getHighBitsSet

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

Definition:APInt.h:296

llvm::APInt::getZero

static APInt getZero(unsigned numBits)

Get the '0' value for the specified bit-width.

Definition:APInt.h:200

llvm::APInt::setLowBits

void setLowBits(unsigned loBits)

Set the bottom loBits bits.

Definition:APInt.h:1389

llvm::APInt::extractBits

APInt extractBits(unsigned numBits, unsigned bitPosition) const

Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).

Definition:APInt.cpp:455

llvm::APInt::isOne

bool isOne() const

Determine if this is a value of 1.

Definition:APInt.h:389

llvm::APInt::getBitsSetFrom

static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)

Constructs an APInt value that has a contiguous range of bits set.

Definition:APInt.h:286

llvm::APInt::getOneBitSet

static APInt getOneBitSet(unsigned numBits, unsigned BitNo)

Return an APInt with exactly one bit set in the result.

Definition:APInt.h:239

llvm::APInt::clearHighBits

void clearHighBits(unsigned hiBits)

Set top hiBits bits to 0.

Definition:APInt.h:1424

llvm::APInt::getSExtValue

int64_t getSExtValue() const

Get sign extended value.

Definition:APInt.h:1542

llvm::APInt::lshrInPlace

void lshrInPlace(unsigned ShiftAmt)

Logical right-shift this APInt by ShiftAmt in place.

Definition:APInt.h:858

llvm::APInt::lshr

APInt lshr(unsigned shiftAmt) const

Logical right-shift function.

Definition:APInt.h:851

llvm::APInt::countr_one

unsigned countr_one() const

Count the number of trailing one bits.

Definition:APInt.h:1635

llvm::APInt::uge

bool uge(const APInt &RHS) const

Unsigned greater or equal comparison.

Definition:APInt.h:1221

llvm::APInt::setBitVal

void setBitVal(unsigned BitPosition, bool BitValue)

Set a given bit to a given value.

Definition:APInt.h:1343

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition:ArrayRef.h:41

llvm::ArrayRef::size

size_t size() const

size - Get the array size.

Definition:ArrayRef.h:168

llvm::AttrBuilder

Definition:Attributes.h:1064

llvm::AttrBuilder::hasAttributes

bool hasAttributes() const

Return true if the builder has IR-level attributes.

Definition:Attributes.h:1123

llvm::AttrBuilder::contains

bool contains(Attribute::AttrKind A) const

Return true if the builder has the specified attribute.

Definition:Attributes.cpp:2326

llvm::AttrBuilder::removeAttribute

AttrBuilder & removeAttribute(Attribute::AttrKind Val)

Remove an attribute from the builder.

Definition:Attributes.cpp:2118

llvm::AttributeList

Definition:Attributes.h:490

llvm::AttributeList::hasFnAttr

bool hasFnAttr(Attribute::AttrKind Kind) const

Return true if the attribute exists for the function.

Definition:Attributes.cpp:1877

llvm::BuildVectorSDNode

A "pseudo-class" with methods for operating on BUILD_VECTORs.

Definition:SelectionDAGNodes.h:2107

llvm::BuildVectorSDNode::getConstantSplatNode

ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const

Returns the demanded splatted constant or null if this is not a constant splat.

Definition:SelectionDAG.cpp:13249

llvm::CCValAssign

CCValAssign - Represent assignment of one arg/retval to a location.

Definition:CallingConvLower.h:33

llvm::CCValAssign::isRegLoc

bool isRegLoc() const

Definition:CallingConvLower.h:122

llvm::CCValAssign::getLocReg

Definition:CallingConvLower.h:128

llvm::CallBase

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

Definition:InstrTypes.h:1112

llvm::CallInst

This class represents a function call, abstracting a target machine's calling convention.

Definition:Instructions.h:1479

llvm::ConstantDataArray::get

static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)

get() constructor - Return a constant with array type with an element count and element type matching...

Definition:Constants.h:709

llvm::ConstantFPSDNode

Definition:SelectionDAGNodes.h:1739

llvm::ConstantFP

ConstantFP - Floating Point Values [float, double].

Definition:Constants.h:271

llvm::ConstantRange

This class represents a range of values.

Definition:ConstantRange.h:47

llvm::ConstantSDNode

Definition:SelectionDAGNodes.h:1684

llvm::ConstantSDNode::isOne

bool isOne() const

Definition:SelectionDAGNodes.h:1709

llvm::ConstantSDNode::getAPIntValue

const APInt & getAPIntValue() const

Definition:SelectionDAGNodes.h:1700

llvm::ConstantSDNode::isOpaque

bool isOpaque() const

Definition:SelectionDAGNodes.h:1715

llvm::ConstantSDNode::isZero

bool isZero() const

Definition:SelectionDAGNodes.h:1710

llvm::ConstantSDNode::isAllOnes

bool isAllOnes() const

Definition:SelectionDAGNodes.h:1711

llvm::Constant

This is an important base class in LLVM.

Definition:Constant.h:42

llvm::DWARFExpression::Operation

This class represents an Operation in the Expression.

Definition:DWARFExpression.h:32

llvm::DWARFExpression::Operation::getNumOperands

uint64_t getNumOperands() const

Definition:DWARFExpression.h:90

llvm::DataLayout

A parsed version of the target data layout string in and methods for querying it.

Definition:DataLayout.h:63

llvm::DataLayout::isLittleEndian

bool isLittleEndian() const

Layout endianness...

Definition:DataLayout.h:197

llvm::DataLayout::isBigEndian

bool isBigEndian() const

Definition:DataLayout.h:198

llvm::DataLayout::getPrefTypeAlign

Align getPrefTypeAlign(Type *Ty) const

Returns the preferred stack/global alignment for the specified type.

Definition:DataLayout.cpp:847

llvm::DemandedBits

Definition:DemandedBits.h:40

llvm::ElementCount

Definition:TypeSize.h:300

llvm::Function

Definition:Function.h:63

llvm::Function::getAttributes

AttributeList getAttributes() const

Return the attribute list for this Function.

Definition:Function.h:353

llvm::GISelKnownBits

Definition:GISelKnownBits.h:29

llvm::GlobalAddressSDNode

Definition:SelectionDAGNodes.h:1876

llvm::GlobalAddressSDNode::getOffset

int64_t getOffset() const

Definition:SelectionDAGNodes.h:1891

llvm::GlobalAddressSDNode::getGlobal

const GlobalValue * getGlobal() const

Definition:SelectionDAGNodes.h:1890

llvm::GlobalValue

Definition:GlobalValue.h:48

llvm::GlobalValue::getParent

Module * getParent()

Get the module that this global value is contained inside of...

Definition:GlobalValue.h:657

llvm::GlobalVariable

Definition:GlobalVariable.h:39

llvm::InlineAsm

Definition:InlineAsm.h:34

llvm::InlineAsm::isLabel

@ isLabel

Definition:InlineAsm.h:99

llvm::InlineAsm::isInput

@ isInput

Definition:InlineAsm.h:96

llvm::InlineAsm::isOutput

@ isOutput

Definition:InlineAsm.h:97

llvm::InlineAsm::isClobber

@ isClobber

Definition:InlineAsm.h:98

llvm::InlineAsm::ConstraintCodeVector

std::vector< std::string > ConstraintCodeVector

Definition:InlineAsm.h:102

llvm::InstructionCost

Definition:InstructionCost.h:29

llvm::IntegerType::get

static IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

Definition:Type.cpp:311

llvm::LLVMContext::emitError

void emitError(const Instruction *I, const Twine &ErrorStr)

emitError - Emit an error message to the currently installed error handler with optional location inf...

Definition:LLVMContext.cpp:210

llvm::LoadSDNode

This class is used to represent ISD::LOAD nodes.

Definition:SelectionDAGNodes.h:2464

llvm::MCContext

Context object for machine code objects.

Definition:MCContext.h:83

llvm::MCExpr

Base class for the full range of assembler expressions which are needed for parsing.

Definition:MCExpr.h:34

llvm::MCRegister

Wrapper class representing physical registers. Should be passed by value.

Definition:MCRegister.h:33

llvm::MCSymbolRefExpr::create

static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)

Definition:MCExpr.h:398

llvm::MVT

Machine Value Type.

Definition:MachineValueType.h:35

llvm::MVT::SimpleValueType

SimpleValueType

Definition:MachineValueType.h:37

llvm::MVT::SimpleTy

SimpleValueType SimpleTy

Definition:MachineValueType.h:55

llvm::MVT::isInteger

bool isInteger() const

Return true if this is an integer or a vector integer type.

Definition:MachineValueType.h:90

llvm::MVT::getSizeInBits

TypeSize getSizeInBits() const

Returns the size of the specified MVT in bits.

Definition:MachineValueType.h:308

llvm::MVT::isFloatingPoint

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

Definition:MachineValueType.h:80

llvm::MVT::getIntegerVT

static MVT getIntegerVT(unsigned BitWidth)

Definition:MachineValueType.h:441

llvm::MVT::getScalarType

MVT getScalarType() const

If this is a vector, return the element type, otherwise return this.

Definition:MachineValueType.h:259

llvm::MachineFrameInfo

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

Definition:MachineFrameInfo.h:106

llvm::MachineFrameInfo::setAdjustsStack

void setAdjustsStack(bool V)

Definition:MachineFrameInfo.h:618

llvm::MachineFrameInfo::setHasCalls

void setHasCalls(bool V)

Definition:MachineFrameInfo.h:622

llvm::MachineFrameInfo::getObjectAlign

Align getObjectAlign(int ObjectIdx) const

Return the alignment of the specified stack object.

Definition:MachineFrameInfo.h:486

llvm::MachineFunction

Definition:MachineFunction.h:267

llvm::MachineFunction::getFrameInfo

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

Definition:MachineFunction.h:749

llvm::MachineFunction::getDenormalMode

DenormalMode getDenormalMode(const fltSemantics &FPType) const

Returns the denormal handling type for the default rounding mode of the function.

Definition:MachineFunction.cpp:324

llvm::MachineFunction::getJTISymbol

MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const

getJTISymbol - Return the MCSymbol for the specified non-empty jump table.

Definition:MachineFunction.cpp:787

llvm::MachineFunction::getFunction

Function & getFunction()

Return the LLVM function that this machine code represents.

Definition:MachineFunction.h:704

llvm::MachineJumpTableInfo::EK_GPRel32BlockAddress

@ EK_GPRel32BlockAddress

EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...

Definition:MachineJumpTableInfo.h:63

llvm::MachineJumpTableInfo::EK_LabelDifference32

@ EK_LabelDifference32

EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.

Definition:MachineJumpTableInfo.h:72

llvm::MachineJumpTableInfo::EK_BlockAddress

@ EK_BlockAddress

EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.

Definition:MachineJumpTableInfo.h:53

llvm::MachineJumpTableInfo::EK_GPRel64BlockAddress

@ EK_GPRel64BlockAddress

EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...

Definition:MachineJumpTableInfo.h:58

llvm::MachineMemOperand::MONone

@ MONone

Definition:MachineMemOperand.h:134

llvm::MachineOperand::clobbersPhysReg

static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)

clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.

Definition:MachineOperand.h:646

llvm::MachineRegisterInfo

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Definition:MachineRegisterInfo.h:51

llvm::Module::getNamedGlobal

const GlobalVariable * getNamedGlobal(StringRef Name) const

Return the global variable in the module with the specified name, of arbitrary type.

Definition:Module.h:462

llvm::MutableArrayRef

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

Definition:ArrayRef.h:310

llvm::MutableArrayRef::end

iterator end() const

Definition:ArrayRef.h:360

llvm::MutableArrayRef::begin

iterator begin() const

Definition:ArrayRef.h:359

llvm::PointerType

Class to represent pointers.

Definition:DerivedTypes.h:670

llvm::PointerType::get

static PointerType * get(Type *ElementType, unsigned AddressSpace)

This constructs a pointer to an object of the specified type in a numbered address space.

llvm::Register

Wrapper class representing virtual and physical registers.

Definition:Register.h:19

llvm::SDLoc

Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...

Definition:SelectionDAGNodes.h:1182

llvm::SDNode

Represents one node in the SelectionDAG.

Definition:SelectionDAGNodes.h:496

llvm::SDNode::getOpcode

unsigned getOpcode() const

Return the SelectionDAG opcode value for this node.

Definition:SelectionDAGNodes.h:687

llvm::SDNode::hasOneUse

bool hasOneUse() const

Return true if there is exactly one use of this node.

Definition:SelectionDAGNodes.h:739

llvm::SDNode::getFlags

SDNodeFlags getFlags() const

Definition:SelectionDAGNodes.h:1043

llvm::SDNode::getOperand

const SDValue & getOperand(unsigned Num) const

Definition:SelectionDAGNodes.h:992

llvm::SDNode::getValueType

EVT getValueType(unsigned ResNo) const

Return the type of a specified result.

Definition:SelectionDAGNodes.h:1062

llvm::SDNode::setFlags

void setFlags(SDNodeFlags NewFlags)

Definition:SelectionDAGNodes.h:1044

llvm::SDValue

Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.

Definition:SelectionDAGNodes.h:145

llvm::SDValue::isUndef

bool isUndef() const

Definition:SelectionDAGNodes.h:1249

llvm::SDValue::getNode

SDNode * getNode() const

get the SDNode which holds the desired result

Definition:SelectionDAGNodes.h:159

llvm::SDValue::hasOneUse

bool hasOneUse() const

Return true if there is exactly one node using value ResNo of Node.

Definition:SelectionDAGNodes.h:1257

llvm::SDValue::getValue

SDValue getValue(unsigned R) const

Definition:SelectionDAGNodes.h:179

llvm::SDValue::getValueType

EVT getValueType() const

Return the ValueType of the referenced return value.

Definition:SelectionDAGNodes.h:1217

llvm::SDValue::getValueSizeInBits

TypeSize getValueSizeInBits() const

Returns the size of the value in bits.

Definition:SelectionDAGNodes.h:199

llvm::SDValue::getOperand

const SDValue & getOperand(unsigned i) const

Definition:SelectionDAGNodes.h:1225

llvm::SDValue::use_empty

bool use_empty() const

Return true if there are no nodes using value ResNo of Node.

Definition:SelectionDAGNodes.h:1253

llvm::SDValue::getConstantOperandAPInt

const APInt & getConstantOperandAPInt(unsigned i) const

Definition:SelectionDAGNodes.h:1233

llvm::SDValue::getScalarValueSizeInBits

uint64_t getScalarValueSizeInBits() const

Definition:SelectionDAGNodes.h:203

llvm::SDValue::getConstantOperandVal

uint64_t getConstantOperandVal(unsigned i) const

Definition:SelectionDAGNodes.h:1229

llvm::SDValue::getSimpleValueType

MVT getSimpleValueType() const

Return the simple ValueType of the referenced return value.

Definition:SelectionDAGNodes.h:190

llvm::SDValue::getOpcode

unsigned getOpcode() const

Definition:SelectionDAGNodes.h:1213

llvm::SelectionDAG

This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...

Definition:SelectionDAG.h:228

llvm::SelectionDAG::willNotOverflowAdd

bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const

Determine if the result of the addition of 2 nodes can never overflow.

Definition:SelectionDAG.h:2000

llvm::SelectionDAG::getReducedAlign

Align getReducedAlign(EVT VT, bool UseABI)

In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...

Definition:SelectionDAG.cpp:2743

llvm::SelectionDAG::getExtLoad

SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

Definition:SelectionDAG.cpp:9287

llvm::SelectionDAG::getTargetGlobalAddress

SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)

Definition:SelectionDAG.h:751

llvm::SelectionDAG::getExtOrTrunc

SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)

Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...

Definition:SelectionDAG.h:983

llvm::SelectionDAG::ComputeMaxSignificantBits

unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const

Get the upper bound on bit size for this Value Op as a signed integer.

Definition:SelectionDAG.cpp:5417

llvm::SelectionDAG::isKnownNeverSNaN

bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const

Definition:SelectionDAG.h:2153

llvm::SelectionDAG::getVTList

SDVTList getVTList(EVT VT)

Return an SDVTList that represents the list of values specified.

Definition:SelectionDAG.cpp:10708

llvm::SelectionDAG::getShiftAmountConstant

SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)

Definition:SelectionDAG.cpp:1811

llvm::SelectionDAG::FoldSetCC

SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)

Constant fold a setcc to true or false.

Definition:SelectionDAG.cpp:2813

llvm::SelectionDAG::getAllOnesConstant

SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)

Definition:SelectionDAG.cpp:1800

llvm::SelectionDAG::ExtractVectorElements

void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())

Append the extracted elements from Start to Count out of the vector Op in Args.

Definition:SelectionDAG.cpp:13053

llvm::SelectionDAG::getVScale

SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)

Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.

Definition:SelectionDAG.cpp:2092

llvm::SelectionDAG::getFreeze

SDValue getFreeze(SDValue V)

Return a freeze using the SDLoc of the value operand.

Definition:SelectionDAG.cpp:2462

llvm::SelectionDAG::getConstantPool

SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)

Definition:SelectionDAG.cpp:1968

llvm::SelectionDAG::isConstantIntBuildVectorOrConstantInt

bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const

Test whether the given value is a constant int or similar node.

Definition:SelectionDAG.cpp:13439

llvm::SelectionDAG::getJumpTableDebugInfo

SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)

Definition:SelectionDAG.cpp:1961

llvm::SelectionDAG::getSetCC

SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)

Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...

Definition:SelectionDAG.h:1251

llvm::SelectionDAG::UnrollVectorOp

SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)

Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...

Definition:SelectionDAG.cpp:12720

llvm::SelectionDAG::getConstantFP

SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)

Create a ConstantFPSDNode wrapping a constant value.

Definition:SelectionDAG.cpp:1873

llvm::SelectionDAG::getLoad

SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)

Loads are not normal binary operators: their result type is not determined by their operands,...

Definition:SelectionDAG.cpp:9270

llvm::SelectionDAG::getGLOBAL_OFFSET_TABLE

SDValue getGLOBAL_OFFSET_TABLE(EVT VT)

Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.

Definition:SelectionDAG.h:1141

llvm::SelectionDAG::getStepVector

SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)

Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...

Definition:SelectionDAG.cpp:2125

llvm::SelectionDAG::willNotOverflowSub

bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const

Determine if the result of the sub of 2 nodes can never overflow.

Definition:SelectionDAG.h:2018

llvm::SelectionDAG::shouldOptForSize

bool shouldOptForSize() const

Definition:SelectionDAG.cpp:1401

llvm::SelectionDAG::getNOT

SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)

Create a bitwise NOT operation as (XOR Val, -1).

Definition:SelectionDAG.cpp:1622

llvm::SelectionDAG::getTargetLoweringInfo

const TargetLowering & getTargetLoweringInfo() const

Definition:SelectionDAG.h:503

llvm::SelectionDAG::MaxRecursionDepth

static constexpr unsigned MaxRecursionDepth

Definition:SelectionDAG.h:458

llvm::SelectionDAG::GetSplitDestVTs

std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const

Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...

Definition:SelectionDAG.cpp:12961

llvm::SelectionDAG::getUNDEF

SDValue getUNDEF(EVT VT)

Return an UNDEF node. UNDEF does not have a useful SDLoc.

Definition:SelectionDAG.h:1129

llvm::SelectionDAG::getBuildVector

SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)

Return an ISD::BUILD_VECTOR node.

Definition:SelectionDAG.h:857

llvm::SelectionDAG::getBitcast

SDValue getBitcast(EVT VT, SDValue V)

Return a bitcast using the SDLoc of the value operand, and casting to the provided type.

Definition:SelectionDAG.cpp:2433

llvm::SelectionDAG::getSelect

SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())

Helper function to make it easier to build Select's if you just have operands and don't want to check...

Definition:SelectionDAG.h:1280

llvm::SelectionDAG::getZeroExtendInReg

SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)

Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.

Definition:SelectionDAG.cpp:1568

llvm::SelectionDAG::getDataLayout

const DataLayout & getDataLayout() const

Definition:SelectionDAG.h:497

llvm::SelectionDAG::doesNodeExist

bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)

Check if a node exists without modifying its flags.

Definition:SelectionDAG.cpp:11311

llvm::SelectionDAG::getConstant

SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)

Create a ConstantSDNode wrapping a constant value.

Definition:SelectionDAG.cpp:1666

llvm::SelectionDAG::getMemBasePlusOffset

SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())

Returns sum of the base pointer and offset.

Definition:SelectionDAG.cpp:8052

llvm::SelectionDAG::getGlobalAddress

SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)

Definition:SelectionDAG.cpp:1891

llvm::SelectionDAG::getTruncStore

SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

Definition:SelectionDAG.cpp:9371

llvm::SelectionDAG::SplitVector

std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)

Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.

Definition:SelectionDAG.cpp:13006

llvm::SelectionDAG::isGuaranteedNotToBeUndefOrPoison

bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const

Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...

Definition:SelectionDAG.cpp:5430

llvm::SelectionDAG::getStore

SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

Helper function to build ISD::STORE nodes.

Definition:SelectionDAG.cpp:9320

llvm::SelectionDAG::getSignedConstant

SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)

Definition:SelectionDAG.cpp:1794

llvm::SelectionDAG::getSplatVector

SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)

Definition:SelectionDAG.h:891

llvm::SelectionDAG::SignBitIsZero

bool SignBitIsZero(SDValue Op, unsigned Depth=0) const

Return true if the sign bit of Op is known to be zero.

Definition:SelectionDAG.cpp:2969

llvm::SelectionDAG::RemoveDeadNode

void RemoveDeadNode(SDNode *N)

Remove the specified node from the system.

Definition:SelectionDAG.cpp:1084

llvm::SelectionDAG::getSExtOrTrunc

SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)

Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...

Definition:SelectionDAG.cpp:1502

llvm::SelectionDAG::isKnownToBeAPowerOfTwo

bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const

Test if the given value is known to have exactly one bit set.

Definition:SelectionDAG.cpp:4645

llvm::SelectionDAG::isKnownNeverZero

bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const

Test whether the given SDValue is known to contain non-zero value(s).

Definition:SelectionDAG.cpp:5814

llvm::SelectionDAG::FoldConstantArithmetic

SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())

Definition:SelectionDAG.cpp:6672

llvm::SelectionDAG::getBoolExtOrTrunc

SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)

Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...

Definition:SelectionDAG.cpp:1559

llvm::SelectionDAG::getExternalSymbol

SDValue getExternalSymbol(const char *Sym, EVT VT)

Definition:SelectionDAG.cpp:2052

llvm::SelectionDAG::getTarget

const TargetMachine & getTarget() const

Definition:SelectionDAG.h:498

llvm::SelectionDAG::getSelectCC

SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)

Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...

Definition:SelectionDAG.h:1290

llvm::SelectionDAG::isKnownNeverZeroFloat

bool isKnownNeverZeroFloat(SDValue Op) const

Test whether the given floating point SDValue is known to never be positive or negative zero.

Definition:SelectionDAG.cpp:5805

llvm::SelectionDAG::getValueType

SDValue getValueType(EVT)

Definition:SelectionDAG.cpp:2038

llvm::SelectionDAG::getNode

SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)

Gets or creates the specified node.

Definition:SelectionDAG.cpp:10327

llvm::SelectionDAG::getFPExtendOrRound

SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)

Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...

Definition:SelectionDAG.cpp:1475

llvm::SelectionDAG::isKnownNeverNaN

bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const

Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.

Definition:SelectionDAG.cpp:5672

llvm::SelectionDAG::getTargetConstant

SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)

Definition:SelectionDAG.h:701

llvm::SelectionDAG::ComputeNumSignBits

unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const

Return the number of times the sign bit of the register is replicated into the other bits.

Definition:SelectionDAG.cpp:4739

llvm::SelectionDAG::getBoolConstant

SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)

Create a true or false constant of type VT using the target's BooleanContent for type OpVT.

Definition:SelectionDAG.cpp:1651

llvm::SelectionDAG::getTargetBlockAddress

SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)

Definition:SelectionDAG.h:797

llvm::SelectionDAG::getVectorIdxConstant

SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)

Definition:SelectionDAG.cpp:1824

llvm::SelectionDAG::getMachineFunction

MachineFunction & getMachineFunction() const

Definition:SelectionDAG.h:492

llvm::SelectionDAG::getValidMaximumShiftAmount

std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const

If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...

Definition:SelectionDAG.cpp:3392

llvm::SelectionDAG::computeKnownBits

KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const

Determine which bits of Op are known to be either zero or one and return them in Known.

Definition:SelectionDAG.cpp:3415

llvm::SelectionDAG::getZExtOrTrunc

SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)

Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...

Definition:SelectionDAG.cpp:1508

llvm::SelectionDAG::getCondCode

SDValue getCondCode(ISD::CondCode Cond)

Definition:SelectionDAG.cpp:2079

llvm::SelectionDAG::MaskedValueIsZero

bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const

Return true if 'Op & Mask' is known to be zero.

Definition:SelectionDAG.cpp:2977

llvm::SelectionDAG::getValidShiftAmount

std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const

If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...

Definition:SelectionDAG.cpp:3349

llvm::SelectionDAG::getObjectPtrOffset

SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)

Create an add instruction with appropriate flags when used for addressing some offset of an object.

Definition:SelectionDAG.h:1081

llvm::SelectionDAG::getContext

LLVMContext * getContext() const

Definition:SelectionDAG.h:510

llvm::SelectionDAG::getSetCCVP

SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)

Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...

Definition:SelectionDAG.h:1268

llvm::SelectionDAG::CreateStackTemporary

SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)

Create a stack temporary based on the size in bytes and the alignment.

Definition:SelectionDAG.cpp:2776

llvm::SelectionDAG::getEntryNode

SDValue getEntryNode() const

Return the token chain corresponding to the entry of the function.

Definition:SelectionDAG.h:580

llvm::SelectionDAG::getSplat

SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)

Returns a node representing a splat of one value into all lanes of the provided vector type.

Definition:SelectionDAG.h:907

llvm::SelectionDAG::SplitScalar

std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)

Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.

Definition:SelectionDAG.cpp:12946

llvm::SelectionDAG::getVectorShuffle

SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)

Return an ISD::VECTOR_SHUFFLE node.

Definition:SelectionDAG.cpp:2147

llvm::ShuffleVectorSDNode::commuteMask

static void commuteMask(MutableArrayRef< int > Mask)

Change values in a shuffle permute mask assuming the two vector operands have swapped position.

Definition:SelectionDAGNodes.h:1666

llvm::SmallString

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

Definition:SmallString.h:26

llvm::SmallVectorBase::size

size_t size() const

Definition:SmallVector.h:78

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition:SmallVector.h:573

llvm::SmallVectorImpl::append

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

Definition:SmallVector.h:683

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition:SmallVector.h:413

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::SrcOp

Definition:MachineIRBuilder.h:142

llvm::StoreSDNode

This class is used to represent ISD::STORE nodes.

Definition:SelectionDAGNodes.h:2492

llvm::StringRef

StringRef - Represent a constant reference to a string, i.e.

Definition:StringRef.h:51

llvm::StringRef::substr

constexpr StringRef substr(size_t Start, size_t N=npos) const

Return a reference to the substring from [Start, Start + N).

Definition:StringRef.h:571

llvm::StringRef::starts_with

bool starts_with(StringRef Prefix) const

Check if this string starts with the given Prefix.

Definition:StringRef.h:265

llvm::StringRef::size

constexpr size_t size() const

size - Get the string size.

Definition:StringRef.h:150

llvm::StringRef::data

constexpr const char * data() const

data - Get a pointer to the start of the string (which may not be null terminated).

Definition:StringRef.h:144

llvm::StringRef::end

iterator end() const

Definition:StringRef.h:118

llvm::StructType

Class to represent struct types.

Definition:DerivedTypes.h:218

llvm::TargetLoweringBase::ArgListEntry

Definition:TargetLowering.h:297

llvm::TargetLoweringBase::ArgListEntry::IsNoExt

bool IsNoExt

Definition:TargetLowering.h:304

llvm::TargetLoweringBase::ArgListEntry::setAttributes

void setAttributes(const CallBase *Call, unsigned ArgIdx)

Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.

Definition:TargetLowering.cpp:112

llvm::TargetLoweringBase::ArgListEntry::IsSwiftAsync

bool IsSwiftAsync

Definition:TargetLowering.h:314

llvm::TargetLoweringBase::ArgListEntry::IsSRet

bool IsSRet

Definition:TargetLowering.h:306

llvm::TargetLoweringBase::ArgListEntry::IsInAlloca

bool IsInAlloca

Definition:TargetLowering.h:310

llvm::TargetLoweringBase::ArgListEntry::IsNest

bool IsNest

Definition:TargetLowering.h:307

llvm::TargetLoweringBase::ArgListEntry::IndirectType

Type * IndirectType

Definition:TargetLowering.h:318

llvm::TargetLoweringBase::ArgListEntry::IsSExt

bool IsSExt

Definition:TargetLowering.h:302

llvm::TargetLoweringBase::ArgListEntry::Alignment

MaybeAlign Alignment

Definition:TargetLowering.h:317

llvm::TargetLoweringBase::ArgListEntry::IsReturned

bool IsReturned

Definition:TargetLowering.h:312

llvm::TargetLoweringBase::ArgListEntry::IsZExt

bool IsZExt

Definition:TargetLowering.h:303

llvm::TargetLoweringBase::ArgListEntry::IsPreallocated

bool IsPreallocated

Definition:TargetLowering.h:311

llvm::TargetLoweringBase::ArgListEntry::IsSwiftSelf

bool IsSwiftSelf

Definition:TargetLowering.h:313

llvm::TargetLoweringBase::ArgListEntry::IsInReg

bool IsInReg

Definition:TargetLowering.h:305

llvm::TargetLoweringBase::ArgListEntry::IsByVal

bool IsByVal

Definition:TargetLowering.h:308

llvm::TargetLoweringBase::ArgListEntry::IsSwiftError

bool IsSwiftError

Definition:TargetLowering.h:315

llvm::TargetLoweringBase

This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...

Definition:TargetLowering.h:195

llvm::TargetLoweringBase::isOperationExpand

bool isOperationExpand(unsigned Op, EVT VT) const

Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...

Definition:TargetLowering.h:1442

llvm::TargetLoweringBase::isShuffleMaskLegal

virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const

Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...

Definition:TargetLowering.h:1244

llvm::TargetLoweringBase::shouldRemoveRedundantExtend

virtual bool shouldRemoveRedundantExtend(SDValue Op) const

Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...

Definition:TargetLowering.h:1827

llvm::TargetLoweringBase::getValueType

EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const

Return the EVT corresponding to this LLVM type.

Definition:TargetLowering.h:1677

llvm::TargetLoweringBase::Expand

@ Expand

Definition:TargetLowering.h:202

llvm::TargetLoweringBase::Legal

@ Legal

Definition:TargetLowering.h:200

llvm::TargetLoweringBase::getLibcallCallingConv

CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const

Get the CallingConv that should be used for the specified libcall.

Definition:TargetLowering.h:3466

llvm::TargetLoweringBase::isLegalICmpImmediate

virtual bool isLegalICmpImmediate(int64_t) const

Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...

Definition:TargetLowering.h:2854

llvm::TargetLoweringBase::isSExtCheaperThanZExt

virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const

Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.

Definition:TargetLowering.h:3085

llvm::TargetLoweringBase::getVectorIdxTy

virtual MVT getVectorIdxTy(const DataLayout &DL) const

Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...

Definition:TargetLowering.h:421

llvm::TargetLoweringBase::isSafeMemOpType

virtual bool isSafeMemOpType(MVT) const

Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.

Definition:TargetLowering.h:1999

llvm::TargetLoweringBase::getTargetMachine

const TargetMachine & getTargetMachine() const

Definition:TargetLowering.h:364

llvm::TargetLoweringBase::isCtpopFast

virtual bool isCtpopFast(EVT VT) const

Return true if ctpop instruction is fast.

Definition:TargetLowering.h:722

llvm::TargetLoweringBase::isZExtFree

virtual bool isZExtFree(Type *FromTy, Type *ToTy) const

Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...

Definition:TargetLowering.h:3066

llvm::TargetLoweringBase::isPaddedAtMostSignificantBitsWhenStored

bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const

Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...

Definition:TargetLowering.h:1832

llvm::TargetLoweringBase::TypePromoteInteger

@ TypePromoteInteger

Definition:TargetLowering.h:211

llvm::TargetLoweringBase::getOptimalMemOpType

virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const

Returns the target specific optimal type for load and store operations as a result of memset,...

Definition:TargetLowering.h:1980

llvm::TargetLoweringBase::getCondCodeAction

LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const

Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...

Definition:TargetLowering.h:1616

llvm::TargetLoweringBase::isCommutativeBinOp

virtual bool isCommutativeBinOp(unsigned Opcode) const

Returns true if the opcode is a commutative binary operation.

Definition:TargetLowering.h:2897

llvm::TargetLoweringBase::isFPImmLegal

virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const

Returns true if the target can instruction select the specified FP immediate natively.

Definition:TargetLowering.h:1235

llvm::TargetLoweringBase::getCmpLibcallReturnType

virtual MVT::SimpleValueType getCmpLibcallReturnType() const

Return the ValueType for comparison libcalls.

Definition:TargetLoweringBase.cpp:1529

llvm::TargetLoweringBase::getBitWidthForCttzElements

unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const

Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...

Definition:TargetLoweringBase.cpp:923

llvm::TargetLoweringBase::shouldTransformSignedTruncationCheck

virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const

Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...

Definition:TargetLowering.h:842

llvm::TargetLoweringBase::isLegalRC

bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const

Return true if the value types that can be represented by the specified register class are all legal.

Definition:TargetLoweringBase.cpp:1144

llvm::TargetLoweringBase::shouldExpandCmpUsingSelects

virtual bool shouldExpandCmpUsingSelects(EVT VT) const

Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...

Definition:TargetLowering.h:3408

llvm::TargetLoweringBase::allowsMisalignedMemoryAccesses

virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const

Determine if the target supports unaligned memory accesses.

Definition:TargetLowering.h:1918

llvm::TargetLoweringBase::isOperationCustom

bool isOperationCustom(unsigned Op, EVT VT) const

Return true if the operation uses custom lowering, regardless of whether the type is legal or not.

Definition:TargetLowering.h:1380

llvm::TargetLoweringBase::getShiftAmountTy

EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const

Returns the type for the shift amount of a shift opcode.

Definition:TargetLoweringBase.cpp:890

llvm::TargetLoweringBase::shouldExtendTypeInLibCall

virtual bool shouldExtendTypeInLibCall(EVT Type) const

Returns true if arguments should be extended in lib calls.

Definition:TargetLowering.h:2303

llvm::TargetLoweringBase::isTruncateFree

virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const

Return true if it's free to truncate a value of type FromTy to type ToTy.

Definition:TargetLowering.h:2972

llvm::TargetLoweringBase::shouldAvoidTransformToShift

virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const

Return true if creating a shift of the type by the given amount is not profitable.

Definition:TargetLowering.h:3384

llvm::TargetLoweringBase::isFPExtFree

virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const

Return true if an fpext operation is free (for instance, because single-precision floating-point numb...

Definition:TargetLowering.h:3186

llvm::TargetLoweringBase::getSetCCResultType

virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const

Return the ValueType of the result of SETCC operations.

Definition:TargetLoweringBase.cpp:1523

llvm::TargetLoweringBase::getTypeToTransformTo

virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const

For types supported by the target, this is an identity function.

Definition:TargetLowering.h:1156

llvm::TargetLoweringBase::getBooleanContents

BooleanContent getBooleanContents(bool isVec, bool isFloat) const

For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...

Definition:TargetLowering.h:1004

llvm::TargetLoweringBase::isCondCodeLegal

bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const

Return true if the specified condition code is legal for a comparison of the specified types on this ...

Definition:TargetLowering.h:1630

llvm::TargetLoweringBase::isTypeLegal

bool isTypeLegal(EVT VT) const

Return true if the target has native support for the specified value type.

Definition:TargetLowering.h:1093

llvm::TargetLoweringBase::getPointerTy

virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const

Return the pointer type for the given address space, defaults to the pointer type from the data layou...

Definition:TargetLowering.h:371

llvm::TargetLoweringBase::isOperationLegal

bool isOperationLegal(unsigned Op, EVT VT) const

Return true if the specified operation is legal on this target.

Definition:TargetLowering.h:1447

llvm::TargetLoweringBase::shouldReduceLoadWidth

virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const

Return true if it is profitable to reduce a load to a smaller type.

Definition:TargetLowering.h:1815

llvm::TargetLoweringBase::getCustomCtpopCost

virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const

Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...

Definition:TargetLowering.h:728

llvm::TargetLoweringBase::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd

virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const

Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...

Definition:TargetLowering.h:855

llvm::TargetLoweringBase::BooleanContent

BooleanContent

Enum that describes how the target represents true/false values.

Definition:TargetLowering.h:234

llvm::TargetLoweringBase::ZeroOrOneBooleanContent

@ ZeroOrOneBooleanContent

Definition:TargetLowering.h:236

llvm::TargetLoweringBase::UndefinedBooleanContent

@ UndefinedBooleanContent

Definition:TargetLowering.h:235

llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent

@ ZeroOrNegativeOneBooleanContent

Definition:TargetLowering.h:237

llvm::TargetLoweringBase::isIntDivCheap

virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const

Return true if integer divide is usually cheaper than a sequence of several shifts,...

Definition:TargetLowering.h:559

llvm::TargetLoweringBase::isOperationLegalOrCustom

bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const

Return true if the specified operation is legal on this target or can be made legal with custom lower...

Definition:TargetLowering.h:1339

llvm::TargetLoweringBase::allowsMemoryAccess

virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const

Return true if the target supports a memory access of this type for the given address space and align...

Definition:TargetLoweringBase.cpp:1735

llvm::TargetLoweringBase::hasAndNotCompare

virtual bool hasAndNotCompare(SDValue Y) const

Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...

Definition:TargetLowering.h:797

llvm::TargetLoweringBase::isNarrowingProfitable

virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const

Return true if it's profitable to narrow operations of type SrcVT to DestVT.

Definition:TargetLowering.h:3305

llvm::TargetLoweringBase::isBinOp

virtual bool isBinOp(unsigned Opcode) const

Return true if the node is a math/logic binary operator.

Definition:TargetLowering.h:2941

llvm::TargetLoweringBase::isCtlzFast

virtual bool isCtlzFast() const

Return true if ctlz instruction is fast.

Definition:TargetLowering.h:717

llvm::TargetLoweringBase::shouldUseStrictFP_TO_INT

virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const

Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...

Definition:TargetLowering.h:2455

llvm::TargetLoweringBase::NegatibleCost

NegatibleCost

Enum that specifies when a float negation is beneficial.

Definition:TargetLowering.h:282

llvm::TargetLoweringBase::NegatibleCost::Cheaper

@ Cheaper

llvm::TargetLoweringBase::NegatibleCost::Expensive

@ Expensive

llvm::TargetLoweringBase::NegatibleCost::Neutral

@ Neutral

llvm::TargetLoweringBase::getTypeAction

LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const

Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...

Definition:TargetLowering.h:1143

llvm::TargetLoweringBase::getCmpLibcallCC

ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const

Get the CondCode that's to be used to test the result of the comparison libcall against zero.

Definition:TargetLowering.h:3455

llvm::TargetLoweringBase::shouldSignExtendTypeInLibCall

virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const

Returns true if arguments should be sign-extended in lib calls.

Definition:TargetLowering.h:2298

llvm::TargetLoweringBase::getLibcallName

const char * getLibcallName(RTLIB::Libcall Call) const

Get the libcall routine name for the specified libcall.

Definition:TargetLowering.h:3440

llvm::TargetLoweringBase::ArgListTy

std::vector< ArgListEntry > ArgListTy

Definition:TargetLowering.h:329

llvm::TargetLoweringBase::getAsmOperandValueType

virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const

Definition:TargetLowering.h:1668

llvm::TargetLoweringBase::isCondCodeLegalOrCustom

bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const

Return true if the specified condition code is legal or custom for a comparison of the specified type...

Definition:TargetLowering.h:1636

llvm::TargetLoweringBase::getRegisterType

MVT getRegisterType(MVT VT) const

Return the type of registers that this ValueType will eventually require.

Definition:TargetLowering.h:1728

llvm::TargetLoweringBase::isFAbsFree

virtual bool isFAbsFree(EVT VT) const

Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...

Definition:TargetLowering.h:3223

llvm::TargetLoweringBase::getOperationAction

LegalizeAction getOperationAction(unsigned Op, EVT VT) const

Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...

Definition:TargetLowering.h:1270

llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote

bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const

Return true if the specified operation is legal on this target or can be made legal with custom lower...

Definition:TargetLowering.h:1367

llvm::TargetLoweringBase::MulExpansionKind

MulExpansionKind

Enum that specifies when a multiplication should be expanded.

Definition:TargetLowering.h:275

llvm::TargetLoweringBase::MulExpansionKind::Always

@ Always

llvm::TargetLoweringBase::getExtendForContent

static ISD::NodeType getExtendForContent(BooleanContent Content)

Definition:TargetLowering.h:334

llvm::TargetLowering

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Definition:TargetLowering.h:3780

llvm::TargetLowering::expandAddSubSat

SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.

Definition:TargetLowering.cpp:10670

llvm::TargetLowering::buildSDIVPow2WithCMov

SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const

Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...

Definition:TargetLowering.cpp:6292

llvm::TargetLowering::getMultipleConstraintMatchWeight

virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const

Examine constraint type and operand type and determine a weight value.

Definition:TargetLowering.cpp:5945

llvm::TargetLowering::expandVPCTLZ

SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const

Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.

Definition:TargetLowering.cpp:9279

llvm::TargetLowering::expandMULO

bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::[US]MULO.

Definition:TargetLowering.cpp:11312

llvm::TargetLowering::expandMUL

bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const

Expand a MUL into two nodes.

Definition:TargetLowering.cpp:7778

llvm::TargetLowering::ConstraintType

ConstraintType

Definition:TargetLowering.h:4950

llvm::TargetLowering::C_RegisterClass

@ C_RegisterClass

Definition:TargetLowering.h:4952

llvm::TargetLowering::C_Memory

@ C_Memory

Definition:TargetLowering.h:4953

llvm::TargetLowering::C_Immediate

@ C_Immediate

Definition:TargetLowering.h:4955

llvm::TargetLowering::C_Register

@ C_Register

Definition:TargetLowering.h:4951

llvm::TargetLowering::C_Other

@ C_Other

Definition:TargetLowering.h:4956

llvm::TargetLowering::C_Address

@ C_Address

Definition:TargetLowering.h:4954

llvm::TargetLowering::C_Unknown

@ C_Unknown

Definition:TargetLowering.h:4957

llvm::TargetLowering::getPICJumpTableRelocBaseExpr

virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const

This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...

Definition:TargetLowering.cpp:472

llvm::TargetLowering::SimplifyDemandedVectorElts

bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const

Look at Vector Op.

Definition:TargetLowering.cpp:3077

llvm::TargetLowering::isUsedByReturnOnly

virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const

Return true if result of the specified node is used by a return node only.

Definition:TargetLowering.h:4807

llvm::TargetLowering::computeKnownBitsForFrameIndex

virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const

Determine which of the bits of FrameIndex FIOp are known to be 0.

Definition:TargetLowering.cpp:3795

llvm::TargetLowering::scalarizeVectorStore

SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const

Definition:TargetLowering.cpp:10050

llvm::TargetLowering::ComputeNumSignBitsForTargetNode

virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const

This method can be implemented by targets that want to expose additional information about sign bits ...

Definition:TargetLowering.cpp:3809

llvm::TargetLowering::lowerCmpEqZeroToCtlzSrl

SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const

Definition:TargetLowering.cpp:10566

llvm::TargetLowering::computeNumSignBitsForTargetInstr

virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const

This method can be implemented by targets that want to expose additional information about sign bits ...

Definition:TargetLowering.cpp:3822

llvm::TargetLowering::expandVPBSWAP

SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const

Expand VP_BSWAP nodes.

Definition:TargetLowering.cpp:9761

llvm::TargetLowering::softenSetCCOperands

void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const

Soften the operands of a comparison.

Definition:TargetLowering.cpp:292

llvm::TargetLowering::forceExpandWideMUL

void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const

Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...

Definition:TargetLowering.cpp:10918

llvm::TargetLowering::makeLibCall

std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const

Returns a pair of (return value, chain).

Definition:TargetLowering.cpp:147

llvm::TargetLowering::expandVecReduceSeq

SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const

Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.

Definition:TargetLowering.cpp:11432

llvm::TargetLowering::expandCTLZ

SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const

Expand CTLZ/CTLZ_ZERO_UNDEF nodes.

Definition:TargetLowering.cpp:9229

llvm::TargetLowering::expandBITREVERSE

SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const

Expand BITREVERSE nodes.

Definition:TargetLowering.cpp:9837

llvm::TargetLowering::expandCTTZ

SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const

Expand CTTZ/CTTZ_ZERO_UNDEF nodes.

Definition:TargetLowering.cpp:9350

llvm::TargetLowering::expandIndirectJTBranch

virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const

Expands target specific indirect branch for the case of JumpTable expansion.

Definition:TargetLowering.cpp:478

llvm::TargetLowering::expandABD

SDValue expandABD(SDNode *N, SelectionDAG &DAG) const

Expand ABDS/ABDU nodes.

Definition:TargetLowering.cpp:9547

llvm::TargetLowering::computeKnownAlignForTargetInstr

virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const

Determine the known alignment for the pointer value R.

Definition:TargetLowering.cpp:3801

llvm::TargetLowering::AsmOperandInfoVector

std::vector< AsmOperandInfo > AsmOperandInfoVector

Definition:TargetLowering.h:5008

llvm::TargetLowering::expandShlSat

SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::[US]SHLSAT.

Definition:TargetLowering.cpp:10821

llvm::TargetLowering::expandIS_FPCLASS

SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const

Expand check for floating point class.

Definition:TargetLowering.cpp:8724

llvm::TargetLowering::expandFP_TO_INT_SAT

SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const

Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.

Definition:TargetLowering.cpp:11483

llvm::TargetLowering::SimplifyMultipleUseDemandedBits

SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const

More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...

Definition:TargetLowering.cpp:675

llvm::TargetLowering::expandUnalignedStore

SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const

Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.

Definition:TargetLowering.cpp:10277

llvm::TargetLowering::SimplifyMultipleUseDemandedVectorElts

SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const

Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...

Definition:TargetLowering.cpp:977

llvm::TargetLowering::findOptimalMemOpLowering

virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const

Determines the optimal series of memory ops to replace the memset / memcpy.

Definition:TargetLowering.cpp:200

llvm::TargetLowering::getConstraintType

virtual ConstraintType getConstraintType(StringRef Constraint) const

Given a constraint, return the type of constraint it is for this target.

Definition:TargetLowering.cpp:5525

llvm::TargetLowering::expandSADDSUBO

void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::S(ADD|SUB)O.

Definition:TargetLowering.cpp:11272

llvm::TargetLowering::expandVPBITREVERSE

SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const

Expand VP_BITREVERSE nodes.

Definition:TargetLowering.cpp:9898

llvm::TargetLowering::expandABS

SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const

Expand ABS nodes.

Definition:TargetLowering.cpp:9492

llvm::TargetLowering::expandVecReduce

SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const

Expand a VECREDUCE_* into an explicit calculation.

Definition:TargetLowering.cpp:11392

llvm::TargetLowering::ShrinkDemandedConstant

bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const

Check to see if the specified operand of the specified instruction is a constant integer.

Definition:TargetLowering.cpp:514

llvm::TargetLowering::expandVPCTTZElements

SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const

Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.

Definition:TargetLowering.cpp:9422

llvm::TargetLowering::BuildSDIV

SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const

Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...

Definition:TargetLowering.cpp:6331

llvm::TargetLowering::getTargetNodeName

virtual const char * getTargetNodeName(unsigned Opcode) const

This method returns the name of a target specific DAG node.

Definition:TargetLowering.cpp:43

llvm::TargetLowering::expandFP_TO_UINT

bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const

Expand float to UINT conversion.

Definition:TargetLowering.cpp:8301

llvm::TargetLowering::parametersInCSRMatch

bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const

Check whether parameters to a call that are passed in callee saved registers are the same as from the...

Definition:TargetLowering.cpp:83

llvm::TargetLowering::SimplifyDemandedVectorEltsForTargetNode

virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const

Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...

Definition:TargetLowering.cpp:3828

llvm::TargetLowering::expandREM

bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const

Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.

Definition:TargetLowering.cpp:11459

llvm::TargetLowering::expandUnalignedLoad

std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const

Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.

Definition:TargetLowering.cpp:10128

llvm::TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM

SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const

Expand fminimumnum/fmaximumnum into multiple comparison with selects.

Definition:TargetLowering.cpp:8612

llvm::TargetLowering::forceExpandMultiply

void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const

Calculate the product twice the width of LHS and RHS.

Definition:TargetLowering.cpp:10860

llvm::TargetLowering::LowerToTLSEmulatedModel

virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const

Lower TLS global address SDNode for target independent emulated TLS model.

Definition:TargetLowering.cpp:10526

llvm::TargetLowering::expandVectorSplice

SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::VECTOR_SPLICE.

Definition:TargetLowering.cpp:11715

llvm::TargetLowering::LowerXConstraint

virtual const char * LowerXConstraint(EVT ConstraintVT) const

Try to replace an X constraint, which matches anything, with another that has more specific requireme...

Definition:TargetLowering.cpp:5571

llvm::TargetLowering::ConstraintWeight

ConstraintWeight

Definition:TargetLowering.h:4960

llvm::TargetLowering::expandCTPOP

SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const

Expand CTPOP nodes.

Definition:TargetLowering.cpp:9083

llvm::TargetLowering::BuildUDIV

SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const

Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...

Definition:TargetLowering.cpp:6499

llvm::TargetLowering::expandVectorNaryOpBySplitting

SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const

Definition:TargetLowering.cpp:12086

llvm::TargetLowering::expandBSWAP

SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const

Expand BSWAP nodes.

Definition:TargetLowering.cpp:9703

llvm::TargetLowering::expandFMINIMUM_FMAXIMUM

SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const

Expand fminimum/fmaximum into multiple comparison with selects.

Definition:TargetLowering.cpp:8545

llvm::TargetLowering::CTTZTableLookup

SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const

Expand CTTZ via Table Lookup.

Definition:TargetLowering.cpp:9306

llvm::TargetLowering::isKnownNeverNaNForTargetNode

virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const

If SNaN is false,.

Definition:TargetLowering.cpp:3921

llvm::TargetLowering::expandDIVREMByConstant

bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const

Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...

Definition:TargetLowering.cpp:7815

llvm::TargetLowering::getVectorSubVecPointer

SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const

Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...

Definition:TargetLowering.cpp:10492

llvm::TargetLowering::computeKnownBitsForTargetNode

virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const

Determine which of the bits specified in Mask are known to be either zero or one and return them in t...

Definition:TargetLowering.cpp:3774

llvm::TargetLowering::isPositionIndependent

bool isPositionIndependent() const

Definition:TargetLowering.cpp:47

llvm::TargetLowering::ConstraintPair

std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair

Definition:TargetLowering.h:5039

llvm::TargetLowering::getNegatedExpression

virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const

Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...

Definition:TargetLowering.cpp:7295

llvm::TargetLowering::getSingleConstraintMatchWeight

virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const

Examine constraint string and operand type and determine a weight value.

Definition:TargetLowering.cpp:5969

llvm::TargetLowering::getSqrtInputTest

virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const

Return a target-dependent comparison result if the input operand is suitable for use with a square ro...

Definition:TargetLowering.cpp:7270

llvm::TargetLowering::getConstraintPreferences

ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const

Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...

Definition:TargetLowering.cpp:6038

llvm::TargetLowering::expandFP_TO_SINT

bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const

Expand float(f32) to SINT(i64) conversion.

Definition:TargetLowering.cpp:8230

llvm::TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode

virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const

More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...

Definition:TargetLowering.cpp:3853

llvm::TargetLowering::LowerAsmOutputForConstraint

virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const

Definition:TargetLowering.cpp:5579

llvm::TargetLowering::buildLegalVectorShuffle

SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const

Tries to build a legal vector shuffle using the provided parameters or equivalent variations.

Definition:TargetLowering.cpp:3867

llvm::TargetLowering::getPICJumpTableRelocBase

virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const

Returns relocation base for the given PIC jumptable.

Definition:TargetLowering.cpp:457

llvm::TargetLowering::scalarizeVectorLoad

std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const

Turn load of vector type into a load of the individual elements.

Definition:TargetLowering.cpp:9961

llvm::TargetLowering::getRegForInlineAsmConstraint

virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const

Given a physical register constraint (e.g.

Definition:TargetLowering.cpp:5669

llvm::TargetLowering::SimplifyDemandedBits

bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const

Look at Op.

Definition:TargetLowering.cpp:1134

llvm::TargetLowering::SimplifyDemandedBitsForTargetNode

virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const

Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.

Definition:TargetLowering.cpp:3840

llvm::TargetLowering::TargetLowering

TargetLowering(const TargetLowering &)=delete

llvm::TargetLowering::isConstFalseVal

bool isConstFalseVal(SDValue N) const

Return if the N is a constant or constant vector equal to the false value from getBooleanContents().

Definition:TargetLowering.cpp:3981

llvm::TargetLowering::IncrementMemoryAddress

SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const

Increments memory address Addr according to the type of the value DataVT that should be stored.

Definition:TargetLowering.cpp:10411

llvm::TargetLowering::verifyReturnAddressArgumentIsConstant

bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const

Definition:TargetLowering.cpp:7260

llvm::TargetLowering::isInTailCallPosition

bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const

Check whether a given call node is in tail position within its function.

Definition:TargetLowering.cpp:53

llvm::TargetLowering::ParseConstraints

virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const

Split up the constraint string from the inline assembly value into the specific constraints and their...

Definition:TargetLowering.cpp:5731

llvm::TargetLowering::isSplatValueForTargetNode

virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const

Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...

Definition:TargetLowering.cpp:3934

llvm::TargetLowering::expandRoundInexactToOdd

SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const

Truncate Op to ResultVT.

Definition:TargetLowering.cpp:11593

llvm::TargetLowering::SimplifySetCC

SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const

Try to simplify a setcc built with the specified operands and cc.

Definition:TargetLowering.cpp:4505

llvm::TargetLowering::expandFunnelShift

SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const

Expand funnel shift.

Definition:TargetLowering.cpp:8034

llvm::TargetLowering::isOffsetFoldingLegal

virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const

Return true if folding a constant offset with the given GlobalAddress is legal.

Definition:TargetLowering.cpp:490

llvm::TargetLowering::LegalizeSetCCCondCode

bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const

Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.

Definition:TargetLowering.cpp:11893

llvm::TargetLowering::isExtendedTrueVal

bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const

Return if N is a True value when extended to VT.

Definition:TargetLowering.cpp:4005

llvm::TargetLowering::ShrinkDemandedOp

bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const

Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.

Definition:TargetLowering.cpp:575

llvm::TargetLowering::isConstTrueVal

bool isConstTrueVal(SDValue N) const

Return if the N is a constant or constant vector equal to the true value from getBooleanContents().

Definition:TargetLowering.cpp:3951

llvm::TargetLowering::expandVPCTPOP

SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const

Expand VP_CTPOP nodes.

Definition:TargetLowering.cpp:9160

llvm::TargetLowering::expandFixedPointDiv

SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].

Definition:TargetLowering.cpp:11141

llvm::TargetLowering::getVectorElementPointer

SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const

Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...

Definition:TargetLowering.cpp:10483

llvm::TargetLowering::ComputeConstraintToUse

virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const

Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...

Definition:TargetLowering.cpp:6088

llvm::TargetLowering::CollectTargetIntrinsicOperands

virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const

Definition:TargetLowering.cpp:5664

llvm::TargetLowering::expandVPCTTZ

SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const

Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.

Definition:TargetLowering.cpp:9406

llvm::TargetLowering::expandVECTOR_COMPRESS

SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const

Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...

Definition:TargetLowering.cpp:11789

llvm::TargetLowering::getTargetConstantFromLoad

virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const

This method returns the constant pool value that will be loaded by LD.

Definition:TargetLowering.cpp:3883

llvm::TargetLowering::expandFP_ROUND

SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const

Expand round(fp) to fp conversion.

Definition:TargetLowering.cpp:11661

llvm::TargetLowering::createSelectForFMINNUM_FMAXNUM

SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const

Try to convert the fminnum/fmaxnum to a compare/select sequence.

Definition:TargetLowering.cpp:8463

llvm::TargetLowering::expandROT

SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const

Expand rotations.

Definition:TargetLowering.cpp:8123

llvm::TargetLowering::LowerAsmOperandForConstraint

virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const

Lower the specified operand into the Ops vector.

Definition:TargetLowering.cpp:5587

llvm::TargetLowering::expandFMINNUM_FMAXNUM

SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const

Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.

Definition:TargetLowering.cpp:8489

llvm::TargetLowering::isGAPlusOffset

virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const

Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.

Definition:TargetLowering.cpp:5484

llvm::TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode

virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const

Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...

Definition:TargetLowering.cpp:3887

llvm::TargetLowering::getJumpTableEncoding

virtual unsigned getJumpTableEncoding() const

Return the entry encoding for a jump table in the current function.

Definition:TargetLowering.cpp:444

llvm::TargetLowering::expandCMP

SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::[US]CMP.

Definition:TargetLowering.cpp:10786

llvm::TargetLowering::expandShiftParts

void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const

Expand shift-by-parts.

Definition:TargetLowering.cpp:8178

llvm::TargetLowering::PerformDAGCombine

virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const

This method will be invoked for all target nodes and for any target-independent nodes that the target...

Definition:TargetLowering.cpp:5514

llvm::TargetLowering::canCreateUndefOrPoisonForTargetNode

virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const

Return true if Op can create undef or poison from non-undef & non-poison operands.

Definition:TargetLowering.cpp:3908

llvm::TargetLowering::expandFixedPointMul

SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].

Definition:TargetLowering.cpp:10984

llvm::TargetLowering::expandIntMINMAX

SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::[US][MIN|MAX].

Definition:TargetLowering.cpp:10589

llvm::TargetLowering::expandVectorFindLastActive

SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const

Expand VECTOR_FIND_LAST_ACTIVE nodes.

Definition:TargetLowering.cpp:9455

llvm::TargetLowering::computeKnownBitsForTargetInstr

virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const

Determine which of the bits specified in Mask are known to be either zero or one and return them in t...

Definition:TargetLowering.cpp:3788

llvm::TargetLowering::expandUADDSUBO

void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::U(ADD|SUB)O.

Definition:TargetLowering.cpp:11226

llvm::TargetLowering::BuildSDIVPow2

virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const

Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.

Definition:TargetLowering.cpp:6265

llvm::TargetLowering::BuildSREMPow2

virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const

Targets may override this function to provide custom SREM lowering for power-of-2 denominators.

Definition:TargetLowering.cpp:6275

llvm::TargetLowering::expandUINT_TO_FP

bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const

Expand UINT(i64) to double(f64) conversion.

Definition:TargetLowering.cpp:8403

llvm::TargetLowering::expandMUL_LOHI

bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const

Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...

Definition:TargetLowering.cpp:7603

llvm::TargetLowering::expandAVG

SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const

Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.

Definition:TargetLowering.cpp:9621

llvm::TargetMachine

Primary interface to the complete machine description for the target machine.

Definition:TargetMachine.h:77

llvm::TargetMachine::isPositionIndependent

bool isPositionIndependent() const

Definition:TargetMachine.cpp:117

llvm::TargetMachine::getTargetTriple

const Triple & getTargetTriple() const

Definition:TargetMachine.h:126

llvm::TargetMachine::Options

TargetOptions Options

Definition:TargetMachine.h:118

llvm::TargetOptions

Definition:TargetOptions.h:132

llvm::TargetRegisterClass

Definition:TargetRegisterInfo.h:44

llvm::TargetRegisterInfo

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

Definition:TargetRegisterInfo.h:235

llvm::TargetRegisterInfo::regclasses

iterator_range< regclass_iterator > regclasses() const

Definition:TargetRegisterInfo.h:835

llvm::TargetRegisterInfo::getRegAsmName

virtual StringRef getRegAsmName(MCRegister Reg) const

Return the assembly name for Reg.

Definition:TargetRegisterInfo.h:1126

llvm::TargetRegisterInfo::isTypeLegalForClass

bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const

Return true if the given TargetRegisterClass has the ValueType T.

Definition:TargetRegisterInfo.h:311

llvm::Triple::isOSBinFormatCOFF

bool isOSBinFormatCOFF() const

Tests whether the OS uses the COFF binary format.

Definition:Triple.h:755

llvm::TypeSize

Definition:TypeSize.h:334

llvm::TypeSize::getFixed

static constexpr TypeSize getFixed(ScalarTy ExactSize)

Definition:TypeSize.h:345

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition:Type.h:45

llvm::Type::getFltSemantics

const fltSemantics & getFltSemantics() const

llvm::Type::isSingleValueType

bool isSingleValueType() const

Return true if the type is a valid type for a register in codegen.

Definition:Type.h:295

llvm::Type::isSized

bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const

Return true if it makes sense to take the size of this type.

Definition:Type.h:310

llvm::Type::getContext

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

Definition:Type.h:128

llvm::Type::isIntegerTy

bool isIntegerTy() const

True if this is an instance of IntegerType.

Definition:Type.h:237

llvm::Value

LLVM Value Representation.

Definition:Value.h:74

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition:Value.h:255

llvm::Value::stripPointerCastsAndAliases

const Value * stripPointerCastsAndAliases() const

Strip off pointer casts, all-zero GEPs, address space casts, and aliases.

Definition:Value.cpp:698

llvm::Value::getName

StringRef getName() const

Return a constant reference to the value's name.

Definition:Value.cpp:309

llvm::details::FixedOrScalableQuantity::isKnownMultipleOf

constexpr bool isKnownMultipleOf(ScalarTy RHS) const

This function tells the caller whether the element count is known at compile time to be a multiple of...

Definition:TypeSize.h:183

llvm::details::FixedOrScalableQuantity::getFixedValue

constexpr ScalarTy getFixedValue() const

Definition:TypeSize.h:202

llvm::details::FixedOrScalableQuantity::isScalable

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

Definition:TypeSize.h:171

llvm::details::FixedOrScalableQuantity::getKnownMinValue

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

Definition:TypeSize.h:168

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

TargetMachine.h

llvm::APIntOps::ScaleBitMask

APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)

Splat/Merge neighboring bits to widen/narrow the bitmask represented by.

Definition:APInt.cpp:2982

llvm::BitmaskEnumDetail::Mask

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

Definition:BitmaskEnum.h:125

llvm::CallingConv::Fast

@ Fast

Attempts to make calls as fast as possible (e.g.

Definition:CallingConv.h:41

llvm::CallingConv::C

@ C

The default llvm calling convention, compatible with C.

Definition:CallingConv.h:34

llvm::ISD::NodeType

NodeType

ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.

Definition:ISDOpcodes.h:40

llvm::ISD::SETCC

@ SETCC

SetCC operator - This evaluates to a true value iff the condition is true.

Definition:ISDOpcodes.h:780

llvm::ISD::MERGE_VALUES

@ MERGE_VALUES

MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...

Definition:ISDOpcodes.h:243

llvm::ISD::CTLZ_ZERO_UNDEF

@ CTLZ_ZERO_UNDEF

Definition:ISDOpcodes.h:753

llvm::ISD::STORE

@ STORE

Definition:ISDOpcodes.h:1103

llvm::ISD::FP_TO_BF16

@ FP_TO_BF16

Definition:ISDOpcodes.h:974

llvm::ISD::SREM

@ SREM

Definition:ISDOpcodes.h:251

llvm::ISD::FGETSIGN

@ FGETSIGN

INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...

Definition:ISDOpcodes.h:512

llvm::ISD::SMUL_LOHI

@ SMUL_LOHI

SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...

Definition:ISDOpcodes.h:257

llvm::ISD::UDIV

@ UDIV

Definition:ISDOpcodes.h:250

llvm::ISD::INSERT_SUBVECTOR

@ INSERT_SUBVECTOR

INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.

Definition:ISDOpcodes.h:574

llvm::ISD::UINT_TO_FP

@ UINT_TO_FP

Definition:ISDOpcodes.h:842

llvm::ISD::UMIN

@ UMIN

Definition:ISDOpcodes.h:699

llvm::ISD::BSWAP

@ BSWAP

Byte Swap and Counting operators.

Definition:ISDOpcodes.h:744

llvm::ISD::ROTR

@ ROTR

Definition:ISDOpcodes.h:739

llvm::ISD::SMULFIX

@ SMULFIX

RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...

Definition:ISDOpcodes.h:374

llvm::ISD::ConstantFP

@ ConstantFP

Definition:ISDOpcodes.h:77

llvm::ISD::UADDO

@ UADDO

Definition:ISDOpcodes.h:331

llvm::ISD::SDIV

@ SDIV

Definition:ISDOpcodes.h:249

llvm::ISD::ADDC

@ ADDC

Carry-setting nodes for multiple precision addition and subtraction.

Definition:ISDOpcodes.h:276

llvm::ISD::FMAD

@ FMAD

FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.

Definition:ISDOpcodes.h:502

llvm::ISD::FMAXNUM_IEEE

@ FMAXNUM_IEEE

Definition:ISDOpcodes.h:1045

llvm::ISD::ADD

@ ADD

Simple integer binary arithmetic operators.

Definition:ISDOpcodes.h:246

llvm::ISD::LOAD

@ LOAD

LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...

Definition:ISDOpcodes.h:1102

llvm::ISD::SMULFIXSAT

@ SMULFIXSAT

Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...

Definition:ISDOpcodes.h:380

llvm::ISD::ANY_EXTEND

@ ANY_EXTEND

ANY_EXTEND - Used for integer types. The high bits are undefined.

Definition:ISDOpcodes.h:814

llvm::ISD::FSUB

@ FSUB

Definition:ISDOpcodes.h:398

llvm::ISD::FMA

@ FMA

FMA - Perform a * b + c with no intermediate rounding step.

Definition:ISDOpcodes.h:498

llvm::ISD::UMULFIX

@ UMULFIX

Definition:ISDOpcodes.h:375

llvm::ISD::FABS

@ FABS

Definition:ISDOpcodes.h:982

llvm::ISD::INTRINSIC_VOID

@ INTRINSIC_VOID

OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...

Definition:ISDOpcodes.h:205

llvm::ISD::SINT_TO_FP

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

Definition:ISDOpcodes.h:841

llvm::ISD::CONCAT_VECTORS

@ CONCAT_VECTORS

CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...

Definition:ISDOpcodes.h:558

llvm::ISD::FADD

@ FADD

Simple binary floating point operators.

Definition:ISDOpcodes.h:397

llvm::ISD::ABS

@ ABS

ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.

Definition:ISDOpcodes.h:717

llvm::ISD::SIGN_EXTEND_VECTOR_INREG

@ SIGN_EXTEND_VECTOR_INREG

SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...

Definition:ISDOpcodes.h:871

llvm::ISD::UDIVREM

@ UDIVREM

Definition:ISDOpcodes.h:263

llvm::ISD::SDIVREM

@ SDIVREM

SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.

Definition:ISDOpcodes.h:262

llvm::ISD::SRL

@ SRL

Definition:ISDOpcodes.h:737

llvm::ISD::FMAXIMUM

@ FMAXIMUM

Definition:ISDOpcodes.h:1051

llvm::ISD::BITCAST

@ BITCAST

BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...

Definition:ISDOpcodes.h:954

llvm::ISD::BUILD_PAIR

@ BUILD_PAIR

BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.

Definition:ISDOpcodes.h:236

llvm::ISD::SDIVFIX

@ SDIVFIX

RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...

Definition:ISDOpcodes.h:387

llvm::ISD::BUILTIN_OP_END

@ BUILTIN_OP_END

BUILTIN_OP_END - This must be the last enum value in this list.

Definition:ISDOpcodes.h:1494

llvm::ISD::UCMP

@ UCMP

Definition:ISDOpcodes.h:706

llvm::ISD::SRA

@ SRA

Definition:ISDOpcodes.h:736

llvm::ISD::USUBO

@ USUBO

Definition:ISDOpcodes.h:335

llvm::ISD::AVGFLOORU

@ AVGFLOORU

Definition:ISDOpcodes.h:681

llvm::ISD::SIGN_EXTEND

@ SIGN_EXTEND

Conversion operators.

Definition:ISDOpcodes.h:805

llvm::ISD::AVGCEILS

@ AVGCEILS

AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...

Definition:ISDOpcodes.h:685

llvm::ISD::SCALAR_TO_VECTOR

@ SCALAR_TO_VECTOR

SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...

Definition:ISDOpcodes.h:635

llvm::ISD::USHLSAT

@ USHLSAT

Definition:ISDOpcodes.h:367

llvm::ISD::UDIVFIXSAT

@ UDIVFIXSAT

Definition:ISDOpcodes.h:394

llvm::ISD::UADDSAT

@ UADDSAT

Definition:ISDOpcodes.h:348

llvm::ISD::CTTZ_ZERO_UNDEF

@ CTTZ_ZERO_UNDEF

Bit counting operators with an undefined result for zero inputs.

Definition:ISDOpcodes.h:752

llvm::ISD::FMAXNUM

@ FMAXNUM

Definition:ISDOpcodes.h:1032

llvm::ISD::FNEG

@ FNEG

Perform various unary floating-point operations inspired by libm.

Definition:ISDOpcodes.h:981

llvm::ISD::CTTZ

@ CTTZ

Definition:ISDOpcodes.h:745

llvm::ISD::SSUBO

@ SSUBO

Same for subtraction.

Definition:ISDOpcodes.h:334

llvm::ISD::FP_TO_UINT

@ FP_TO_UINT

Definition:ISDOpcodes.h:888

llvm::ISD::BRIND

@ BRIND

BRIND - Indirect branch.

Definition:ISDOpcodes.h:1123

llvm::ISD::OR

@ OR

Definition:ISDOpcodes.h:710

llvm::ISD::FCANONICALIZE

@ FCANONICALIZE

Returns platform specific canonical encoding of a floating point number.

Definition:ISDOpcodes.h:515

llvm::ISD::IS_FPCLASS

@ IS_FPCLASS

Performs a check of floating point class property, defined by IEEE-754.

Definition:ISDOpcodes.h:522

llvm::ISD::SSUBSAT

@ SSUBSAT

RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...

Definition:ISDOpcodes.h:356

llvm::ISD::UMULO

@ UMULO

Definition:ISDOpcodes.h:339

llvm::ISD::SRA_PARTS

@ SRA_PARTS

Definition:ISDOpcodes.h:795

llvm::ISD::SELECT

@ SELECT

Select(COND, TRUEVAL, FALSEVAL).

Definition:ISDOpcodes.h:757

llvm::ISD::UMUL_LOHI

@ UMUL_LOHI

Definition:ISDOpcodes.h:258

llvm::ISD::VECREDUCE_UMAX

@ VECREDUCE_UMAX

Definition:ISDOpcodes.h:1451

llvm::ISD::EXTRACT_ELEMENT

@ EXTRACT_ELEMENT

EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...

Definition:ISDOpcodes.h:229

llvm::ISD::SPLAT_VECTOR

@ SPLAT_VECTOR

SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.

Definition:ISDOpcodes.h:642

llvm::ISD::FSHL

@ FSHL

Definition:ISDOpcodes.h:740

llvm::ISD::AVGCEILU

@ AVGCEILU

Definition:ISDOpcodes.h:686

llvm::ISD::CopyFromReg

@ CopyFromReg

CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...

Definition:ISDOpcodes.h:215

llvm::ISD::SADDO

@ SADDO

RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.

Definition:ISDOpcodes.h:330

llvm::ISD::FSHR

@ FSHR

Definition:ISDOpcodes.h:741

llvm::ISD::USUBSAT

@ USUBSAT

Definition:ISDOpcodes.h:357

llvm::ISD::VECREDUCE_ADD

@ VECREDUCE_ADD

Integer reductions may have a result type larger than the vector element type.

Definition:ISDOpcodes.h:1444

llvm::ISD::MULHU

@ MULHU

MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...

Definition:ISDOpcodes.h:674

llvm::ISD::SHL

@ SHL

Shift and rotation operations.

Definition:ISDOpcodes.h:735

llvm::ISD::VECTOR_SHUFFLE

@ VECTOR_SHUFFLE

VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.

Definition:ISDOpcodes.h:615

llvm::ISD::EXTRACT_SUBVECTOR

@ EXTRACT_SUBVECTOR

EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.

Definition:ISDOpcodes.h:588

llvm::ISD::FMINNUM_IEEE

@ FMINNUM_IEEE

FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...

Definition:ISDOpcodes.h:1044

llvm::ISD::STRICT_FMAXNUM

@ STRICT_FMAXNUM

Definition:ISDOpcodes.h:439

llvm::ISD::XOR

@ XOR

Definition:ISDOpcodes.h:711

llvm::ISD::EXTRACT_VECTOR_ELT

@ EXTRACT_VECTOR_ELT

EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...

Definition:ISDOpcodes.h:550

llvm::ISD::ZERO_EXTEND

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

Definition:ISDOpcodes.h:811

llvm::ISD::STRICT_FMINNUM

@ STRICT_FMINNUM

Definition:ISDOpcodes.h:440

llvm::ISD::CTPOP

@ CTPOP

Definition:ISDOpcodes.h:747

llvm::ISD::SELECT_CC

@ SELECT_CC

Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...

Definition:ISDOpcodes.h:772

llvm::ISD::FMUL

@ FMUL

Definition:ISDOpcodes.h:399

llvm::ISD::FMINNUM

@ FMINNUM

FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.

Definition:ISDOpcodes.h:1031

llvm::ISD::SUB

@ SUB

Definition:ISDOpcodes.h:247

llvm::ISD::MULHS

@ MULHS

Definition:ISDOpcodes.h:675

llvm::ISD::SSHLSAT

@ SSHLSAT

RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.

Definition:ISDOpcodes.h:366

llvm::ISD::SMULO

@ SMULO

Same for multiplication.

Definition:ISDOpcodes.h:338

llvm::ISD::PARITY

@ PARITY

Definition:ISDOpcodes.h:749

llvm::ISD::ANY_EXTEND_VECTOR_INREG

@ ANY_EXTEND_VECTOR_INREG

ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...

Definition:ISDOpcodes.h:860

llvm::ISD::SIGN_EXTEND_INREG

@ SIGN_EXTEND_INREG

SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...

Definition:ISDOpcodes.h:849

llvm::ISD::UDIVFIX

@ UDIVFIX

Definition:ISDOpcodes.h:388

llvm::ISD::UMULFIXSAT

@ UMULFIXSAT

Definition:ISDOpcodes.h:381

llvm::ISD::SMIN

@ SMIN

[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.

Definition:ISDOpcodes.h:697

llvm::ISD::Constant

@ Constant

Definition:ISDOpcodes.h:76

llvm::ISD::SDIVFIXSAT

@ SDIVFIXSAT

Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...

Definition:ISDOpcodes.h:393

llvm::ISD::FP_EXTEND

@ FP_EXTEND

X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.

Definition:ISDOpcodes.h:939

llvm::ISD::VSELECT

@ VSELECT

Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...

Definition:ISDOpcodes.h:766

llvm::ISD::UADDO_CARRY

@ UADDO_CARRY

Carry-using nodes for multiple precision addition and subtraction.

Definition:ISDOpcodes.h:310

llvm::ISD::FDIV

@ FDIV

Definition:ISDOpcodes.h:400

llvm::ISD::FREM

@ FREM

Definition:ISDOpcodes.h:401

llvm::ISD::STRICT_FP_TO_SINT

@ STRICT_FP_TO_SINT

STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

Definition:ISDOpcodes.h:457

llvm::ISD::FMINIMUM

@ FMINIMUM

FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....

Definition:ISDOpcodes.h:1050

llvm::ISD::FP_TO_SINT

@ FP_TO_SINT

FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

Definition:ISDOpcodes.h:887

llvm::ISD::TargetConstant

@ TargetConstant

TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...

Definition:ISDOpcodes.h:164

llvm::ISD::AND

@ AND

Bitwise operators - logical and, logical or, logical xor.

Definition:ISDOpcodes.h:709

llvm::ISD::INTRINSIC_WO_CHAIN

@ INTRINSIC_WO_CHAIN

RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...

Definition:ISDOpcodes.h:190

llvm::ISD::USUBO_CARRY

@ USUBO_CARRY

Definition:ISDOpcodes.h:311

llvm::ISD::AVGFLOORS

@ AVGFLOORS

AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...

Definition:ISDOpcodes.h:680

llvm::ISD::ADDE

@ ADDE

Carry-using nodes for multiple precision addition and subtraction.

Definition:ISDOpcodes.h:286

llvm::ISD::UREM

@ UREM

Definition:ISDOpcodes.h:252

llvm::ISD::FREEZE

@ FREEZE

FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...

Definition:ISDOpcodes.h:223

llvm::ISD::INSERT_VECTOR_ELT

@ INSERT_VECTOR_ELT

INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.

Definition:ISDOpcodes.h:539

llvm::ISD::TokenFactor

@ TokenFactor

TokenFactor - This node takes multiple tokens as input and produces a single token result.

Definition:ISDOpcodes.h:52

llvm::ISD::FSIN

@ FSIN

Definition:ISDOpcodes.h:985

llvm::ISD::VECTOR_SPLICE

@ VECTOR_SPLICE

VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...

Definition:ISDOpcodes.h:627

llvm::ISD::STRICT_FSUB

@ STRICT_FSUB

Definition:ISDOpcodes.h:408

llvm::ISD::MUL

@ MUL

Definition:ISDOpcodes.h:248

llvm::ISD::FP_ROUND

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

Definition:ISDOpcodes.h:920

llvm::ISD::CTLZ

@ CTLZ

Definition:ISDOpcodes.h:746

llvm::ISD::FMAXIMUMNUM

@ FMAXIMUMNUM

Definition:ISDOpcodes.h:1056

llvm::ISD::ZERO_EXTEND_VECTOR_INREG

@ ZERO_EXTEND_VECTOR_INREG

ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...

Definition:ISDOpcodes.h:882

llvm::ISD::FP_TO_SINT_SAT

@ FP_TO_SINT_SAT

FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...

Definition:ISDOpcodes.h:906

llvm::ISD::TRUNCATE

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

Definition:ISDOpcodes.h:817

llvm::ISD::ROTL

@ ROTL

Definition:ISDOpcodes.h:738

llvm::ISD::SHL_PARTS

@ SHL_PARTS

SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.

Definition:ISDOpcodes.h:794

llvm::ISD::BITREVERSE

@ BITREVERSE

Definition:ISDOpcodes.h:748

llvm::ISD::SADDSAT

@ SADDSAT

RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...

Definition:ISDOpcodes.h:347

llvm::ISD::AssertZext

@ AssertZext

Definition:ISDOpcodes.h:62

llvm::ISD::SMAX

@ SMAX

Definition:ISDOpcodes.h:698

llvm::ISD::UMAX

@ UMAX

Definition:ISDOpcodes.h:700

llvm::ISD::FMINIMUMNUM

@ FMINIMUMNUM

FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...

Definition:ISDOpcodes.h:1055

llvm::ISD::ABDS

@ ABDS

ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...

Definition:ISDOpcodes.h:692

llvm::ISD::INTRINSIC_W_CHAIN

@ INTRINSIC_W_CHAIN

RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...

Definition:ISDOpcodes.h:198

llvm::ISD::BUILD_VECTOR

@ BUILD_VECTOR

BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...

Definition:ISDOpcodes.h:530

llvm::ISD::isBuildVectorOfConstantSDNodes

bool isBuildVectorOfConstantSDNodes(const SDNode *N)

Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.

Definition:SelectionDAG.cpp:287

llvm::ISD::getExtForLoadExtType

NodeType getExtForLoadExtType(bool IsFP, LoadExtType)

Definition:SelectionDAG.cpp:601

llvm::ISD::matchUnaryPredicate

bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)

Hook for matching ConstantSDNode predicate.

Definition:SelectionDAGNodes.h:3267

llvm::ISD::isZEXTLoad

bool isZEXTLoad(const SDNode *N)

Returns true if the specified node is a ZEXTLOAD.

Definition:SelectionDAGNodes.h:3233

llvm::ISD::getSetCCInverse

CondCode getSetCCInverse(CondCode Operation, EVT Type)

Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.

Definition:SelectionDAG.cpp:639

llvm::ISD::isTrueWhenEqual

bool isTrueWhenEqual(CondCode Cond)

Return true if the specified condition returns true if the two operands to the condition are equal.

Definition:ISDOpcodes.h:1668

llvm::ISD::getUnorderedFlavor

unsigned getUnorderedFlavor(CondCode Cond)

This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...

Definition:ISDOpcodes.h:1673

llvm::ISD::getSetCCSwappedOperands

CondCode getSetCCSwappedOperands(CondCode Operation)

Return the operation corresponding to (Y op X) when given the operation for (X op Y).

Definition:SelectionDAG.cpp:616

llvm::ISD::isBuildVectorAllZeros

bool isBuildVectorAllZeros(const SDNode *N)

Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.

Definition:SelectionDAG.cpp:283

llvm::ISD::isSignedIntSetCC

bool isSignedIntSetCC(CondCode Code)

Return true if this is a setcc instruction that performs a signed comparison when used with integer o...

Definition:ISDOpcodes.h:1643

llvm::ISD::isConstantSplatVector

bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)

Node predicates.

Definition:SelectionDAG.cpp:153

llvm::ISD::matchBinaryPredicate

bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)

Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...

Definition:SelectionDAG.cpp:396

llvm::ISD::UNINDEXED

@ UNINDEXED

Definition:ISDOpcodes.h:1559

llvm::ISD::CondCode

CondCode

ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...

Definition:ISDOpcodes.h:1610

llvm::ISD::SETOEQ

@ SETOEQ

Definition:ISDOpcodes.h:1613

llvm::ISD::SETUNE

@ SETUNE

Definition:ISDOpcodes.h:1626

llvm::ISD::SETUEQ

@ SETUEQ

Definition:ISDOpcodes.h:1621

llvm::ISD::SETOLE

@ SETOLE

Definition:ISDOpcodes.h:1617

llvm::ISD::SETOLT

@ SETOLT

Definition:ISDOpcodes.h:1616

llvm::ISD::SETNE

@ SETNE

Definition:ISDOpcodes.h:1635

llvm::ISD::SETUGT

@ SETUGT

Definition:ISDOpcodes.h:1622

llvm::ISD::SETOGT

@ SETOGT

Definition:ISDOpcodes.h:1614

llvm::ISD::SETULT

@ SETULT

Definition:ISDOpcodes.h:1624

llvm::ISD::SETUO

@ SETUO

Definition:ISDOpcodes.h:1620

llvm::ISD::SETONE

@ SETONE

Definition:ISDOpcodes.h:1618

llvm::ISD::SETGT

@ SETGT

Definition:ISDOpcodes.h:1631

llvm::ISD::SETLT

@ SETLT

Definition:ISDOpcodes.h:1633

llvm::ISD::SETO

@ SETO

Definition:ISDOpcodes.h:1619

llvm::ISD::SETGE

@ SETGE

Definition:ISDOpcodes.h:1632

llvm::ISD::SETUGE

@ SETUGE

Definition:ISDOpcodes.h:1623

llvm::ISD::SETLE

@ SETLE

Definition:ISDOpcodes.h:1634

llvm::ISD::SETULE

@ SETULE

Definition:ISDOpcodes.h:1625

llvm::ISD::SETOGE

@ SETOGE

Definition:ISDOpcodes.h:1615

llvm::ISD::SETEQ

@ SETEQ

Definition:ISDOpcodes.h:1630

llvm::ISD::SETCC_INVALID

@ SETCC_INVALID

Definition:ISDOpcodes.h:1638

llvm::ISD::getVecReduceBaseOpcode

NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)

Get underlying scalar opcode for VECREDUCE opcode.

Definition:SelectionDAG.cpp:448

llvm::ISD::LoadExtType

LoadExtType

LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).

Definition:ISDOpcodes.h:1590

llvm::ISD::NON_EXTLOAD

@ NON_EXTLOAD

Definition:ISDOpcodes.h:1590

llvm::ISD::SEXTLOAD

@ SEXTLOAD

Definition:ISDOpcodes.h:1590

llvm::ISD::ZEXTLOAD

@ ZEXTLOAD

Definition:ISDOpcodes.h:1590

llvm::ISD::EXTLOAD

@ EXTLOAD

Definition:ISDOpcodes.h:1590

llvm::M68k::MemAddrModeKind::V

@ V

llvm::M68k::MemAddrModeKind::K

@ K

llvm::RISCVFenceField::W

@ W

Definition:RISCVBaseInfo.h:374

llvm::RISCVFenceField::R

@ R

Definition:RISCVBaseInfo.h:373

llvm::RTLIB::Libcall

Libcall

RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.

Definition:RuntimeLibcalls.h:33

llvm::SystemZISD::XC

@ XC

Definition:SystemZISelLowering.h:124

llvm::X86::FirstMacroFusionInstKind::Cmp

@ Cmp

llvm::dxil::ElementType::I1

@ I1

llvm::logicalview::LVAttributeKind::Zero

@ Zero

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::Offset

@ Offset

Definition:DWP.cpp:480

llvm::all_of

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1739

llvm::isNullConstant

bool isNullConstant(SDValue V)

Returns true if V is a constant integer zero.

Definition:SelectionDAG.cpp:12205

llvm::Depth

@ Depth

Definition:SIMachineScheduler.h:36

llvm::peekThroughBitcasts

SDValue peekThroughBitcasts(SDValue V)

Return the non-bitcasted source operand of V if it exists.

Definition:SelectionDAG.cpp:12297

llvm::LoopIdiomVectorizeStyle::Masked

@ Masked

llvm::invertFPClassTestIfSimpler

FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)

Evaluates if the specified FP class test is better performed as the inverse (i.e.

Definition:CodeGenCommonISel.cpp:176

llvm::DiagnosticPredicateTy::Match

@ Match

llvm::FloatStyle::Exponent

@ Exponent

llvm::alignDown

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

Definition:MathExtras.h:556

llvm::bit_ceil

T bit_ceil(T Value)

Returns the smallest integral power of two no smaller than Value if Value is nonzero.

Definition:bit.h:342

llvm::isConstOrConstSplatFP

ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)

Returns the SDNode if it is a constant splat BuildVector or constant float.

Definition:SelectionDAG.cpp:12377

llvm::any_of

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1746

llvm::getShuffleDemandedElts

bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)

Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...

Definition:VectorUtils.cpp:373

llvm::Log2_32

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

Definition:MathExtras.h:341

llvm::isBitwiseNot

bool isBitwiseNot(SDValue V, bool AllowUndefs=false)

Returns true if V is a bitwise not operation.

Definition:SelectionDAG.cpp:12321

llvm::isPowerOf2_32

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

Definition:MathExtras.h:292

llvm::ComplexDeinterleavingOperation::Splat

@ Splat

llvm::FPClassTest

FPClassTest

Floating-point class tests, supported by 'is_fpclass' intrinsic.

Definition:FloatingPointMode.h:239

llvm::fcInf

@ fcInf

Definition:FloatingPointMode.h:254

llvm::fcNegSubnormal

@ fcNegSubnormal

Definition:FloatingPointMode.h:246

llvm::fcPosNormal

@ fcPosNormal

Definition:FloatingPointMode.h:250

llvm::fcQNan

@ fcQNan

Definition:FloatingPointMode.h:243

llvm::fcNegZero

@ fcNegZero

Definition:FloatingPointMode.h:247

llvm::fcNegInf

@ fcNegInf

Definition:FloatingPointMode.h:244

llvm::fcFinite

@ fcFinite

Definition:FloatingPointMode.h:260

llvm::fcSubnormal

@ fcSubnormal

Definition:FloatingPointMode.h:256

llvm::fcNone

@ fcNone

Definition:FloatingPointMode.h:240

llvm::fcNegFinite

@ fcNegFinite

Definition:FloatingPointMode.h:259

llvm::fcPosZero

@ fcPosZero

Definition:FloatingPointMode.h:248

llvm::fcPosFinite

@ fcPosFinite

Definition:FloatingPointMode.h:258

llvm::fcNegNormal

@ fcNegNormal

Definition:FloatingPointMode.h:245

llvm::fcZero

@ fcZero

Definition:FloatingPointMode.h:257

llvm::fcAllFlags

@ fcAllFlags

Definition:FloatingPointMode.h:264

llvm::fcPosSubnormal

@ fcPosSubnormal

Definition:FloatingPointMode.h:249

llvm::fcPosInf

@ fcPosInf

Definition:FloatingPointMode.h:251

llvm::fcNormal

@ fcNormal

Definition:FloatingPointMode.h:255

llvm::fcNan

@ fcNan

Definition:FloatingPointMode.h:253

llvm::find_if_not

auto find_if_not(R &&Range, UnaryPredicate P)

Definition:STLExtras.h:1771

llvm::report_fatal_error

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

Definition:Error.cpp:167

llvm::getVScaleRange

ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)

Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...

Definition:ValueTracking.cpp:1058

llvm::isOneOrOneSplat

bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)

Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...

Definition:SelectionDAG.cpp:12414

llvm::PackElem::Hi

@ Hi

llvm::PackElem::Lo

@ Lo

llvm::IRMemLocation::Other

@ Other

Any other memory.

llvm::RecurKind::Or

@ Or

Bitwise or logical OR of integers.

llvm::RecurKind::Mul

@ Mul

Product of integers.

llvm::RecurKind::Xor

@ Xor

Bitwise or logical XOR of integers.

llvm::RecurKind::And

@ And

Bitwise or logical AND of integers.

llvm::RecurKind::Add

@ Add

Sum of integers.

llvm::Op

DWARFExpression::Operation Op

Definition:DWARFExpression.cpp:22

llvm::isConstOrConstSplat

ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)

Returns the SDNode if it is a constant splat BuildVector or constant int.

Definition:SelectionDAG.cpp:12331

llvm::isConstFalseVal

bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)

Definition:Utils.cpp:1625

llvm::BitWidth

constexpr unsigned BitWidth

Definition:BitmaskEnum.h:217

llvm::isOneConstant

bool isOneConstant(SDValue V)

Returns true if V is a constant integer one.

Definition:SelectionDAG.cpp:12224

llvm::commonAlignment

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

Definition:Alignment.h:212

llvm::isNullFPConstant

bool isNullFPConstant(SDValue V)

Returns true if V is an FP constant with a value of positive zero.

Definition:SelectionDAG.cpp:12214

llvm::neg

APFloat neg(APFloat X)

Returns the negated value of the argument.

Definition:APFloat.h:1540

llvm::Log2

unsigned Log2(Align A)

Returns the log2 of the alignment.

Definition:Alignment.h:208

llvm::PGSOQueryType::Test

@ Test

llvm::fltNanEncoding::AllOnes

@ AllOnes

llvm::isAllOnesConstant

bool isAllOnesConstant(SDValue V)

Returns true if V is an integer constant with all bits set.

Definition:SelectionDAG.cpp:12219

llvm::NextPowerOf2

constexpr uint64_t NextPowerOf2(uint64_t A)

Returns the next power of two (in 64-bits) that is strictly greater than A.

Definition:MathExtras.h:383

std::swap

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

Definition:BitVector.h:860

#define N

llvm::APFloatBase::rmNearestTiesToEven

static constexpr roundingMode rmNearestTiesToEven

Definition:APFloat.h:302

llvm::APFloatBase::rmTowardZero

static constexpr roundingMode rmTowardZero

Definition:APFloat.h:306

llvm::APFloatBase::opStatus

opStatus

IEEE-754R 7: Default exception handling.

Definition:APFloat.h:318

llvm::APFloatBase::opOverflow

@ opOverflow

Definition:APFloat.h:322

llvm::APFloatBase::opInexact

@ opInexact

Definition:APFloat.h:324

llvm::Align

This struct is a compact representation of a valid (non-zero power of two) alignment.

Definition:Alignment.h:39

llvm::DenormalMode

Represent subnormal handling kind for floating point instruction inputs and outputs.

Definition:FloatingPointMode.h:70

llvm::DenormalMode::Input

DenormalModeKind Input

Denormal treatment kind for floating point instruction inputs in the default floating-point environme...

Definition:FloatingPointMode.h:96

llvm::DenormalMode::PreserveSign

@ PreserveSign

The sign of a flushed-to-zero number is preserved in the sign of 0.

Definition:FloatingPointMode.h:80

llvm::DenormalMode::PositiveZero

@ PositiveZero

Denormals are flushed to positive zero.

Definition:FloatingPointMode.h:83

llvm::DenormalMode::IEEE

@ IEEE

IEEE-754 denormal numbers preserved.

Definition:FloatingPointMode.h:77

llvm::DenormalMode::inputsAreZero

constexpr bool inputsAreZero() const

Return true if input denormals must be implicitly treated as 0.

Definition:FloatingPointMode.h:150

llvm::EVT

Extended Value Type.

Definition:ValueTypes.h:35

llvm::EVT::getStoreSize

TypeSize getStoreSize() const

Return the number of bytes overwritten by a store of the specified value type.

Definition:ValueTypes.h:390

llvm::EVT::isSimple

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

Definition:ValueTypes.h:137

llvm::EVT::getVectorVT

static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)

Returns the EVT that represents a vector NumElements in length, where each element is of type VT.

Definition:ValueTypes.h:74

llvm::EVT::changeTypeToInteger

EVT changeTypeToInteger() const

Return the type converted to an equivalently sized integer or vector with integer element type.

Definition:ValueTypes.h:121

llvm::EVT::bitsGT

bool bitsGT(EVT VT) const

Return true if this has more bits than VT.

Definition:ValueTypes.h:279

llvm::EVT::bitsLT

bool bitsLT(EVT VT) const

Return true if this has less bits than VT.

Definition:ValueTypes.h:295

llvm::EVT::isFloatingPoint

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

Definition:ValueTypes.h:147

llvm::EVT::getVectorElementCount

ElementCount getVectorElementCount() const

Definition:ValueTypes.h:345

llvm::EVT::getSizeInBits

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

Definition:ValueTypes.h:368

llvm::EVT::isByteSized

bool isByteSized() const

Return true if the bit size is a multiple of 8.

Definition:ValueTypes.h:238

llvm::EVT::getVectorMinNumElements

unsigned getVectorMinNumElements() const

Given a vector type, return the minimum number of elements it contains.

Definition:ValueTypes.h:354

llvm::EVT::getScalarSizeInBits

uint64_t getScalarSizeInBits() const

Definition:ValueTypes.h:380

llvm::EVT::getHalfSizedIntegerVT

EVT getHalfSizedIntegerVT(LLVMContext &Context) const

Finds the smallest simple value type that is greater than or equal to half the width of this EVT.

Definition:ValueTypes.h:425

llvm::EVT::isPow2VectorType

bool isPow2VectorType() const

Returns true if the given vector is a power of 2.

Definition:ValueTypes.h:465

llvm::EVT::getStoreSizeInBits

TypeSize getStoreSizeInBits() const

Return the number of bits overwritten by a store of the specified value type.

Definition:ValueTypes.h:407

llvm::EVT::getSimpleVT

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

Definition:ValueTypes.h:311

llvm::EVT::getIntegerVT

static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)

Returns the EVT that represents an integer with the given number of bits.

Definition:ValueTypes.h:65

llvm::EVT::getFixedSizeInBits

uint64_t getFixedSizeInBits() const

Return the size of the specified fixed width value type in bits.

Definition:ValueTypes.h:376

llvm::EVT::isFixedLengthVector

bool isFixedLengthVector() const

Definition:ValueTypes.h:181

llvm::EVT::isVector

bool isVector() const

Return true if this is a vector value type.

Definition:ValueTypes.h:168

llvm::EVT::getScalarType

EVT getScalarType() const

If this is a vector type, return the element type, otherwise return this.

Definition:ValueTypes.h:318

llvm::EVT::getTypeForEVT

Type * getTypeForEVT(LLVMContext &Context) const

This method returns an LLVM type corresponding to the specified EVT.

Definition:ValueTypes.cpp:210

llvm::EVT::isScalableVector

bool isScalableVector() const

Return true if this is a vector type where the runtime length is machine dependent.

Definition:ValueTypes.h:174

llvm::EVT::getVectorElementType

EVT getVectorElementType() const

Given a vector type, return the type of each element.

Definition:ValueTypes.h:323

llvm::EVT::isScalarInteger

bool isScalarInteger() const

Return true if this is an integer, but not a vector.

Definition:ValueTypes.h:157

llvm::EVT::changeVectorElementType

EVT changeVectorElementType(EVT EltVT) const

Return a VT for a vector type whose attributes match ourselves with the exception of the element type...

Definition:ValueTypes.h:102

llvm::EVT::getFltSemantics

const fltSemantics & getFltSemantics() const

Returns an APFloat semantics tag appropriate for the value type.

Definition:ValueTypes.cpp:320

llvm::EVT::getVectorNumElements

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

Definition:ValueTypes.h:331

llvm::EVT::bitsLE

bool bitsLE(EVT VT) const

Return true if this has no more bits than VT.

Definition:ValueTypes.h:303

llvm::EVT::getHalfNumVectorElementsVT

EVT getHalfNumVectorElementsVT(LLVMContext &Context) const

Definition:ValueTypes.h:448

llvm::EVT::isInteger

bool isInteger() const

Return true if this is an integer or a vector integer type.

Definition:ValueTypes.h:152

llvm::InlineAsm::ConstraintInfo

Definition:InlineAsm.h:123

llvm::InlineAsm::ConstraintInfo::Type

ConstraintPrefix Type

Type - The basic type of the constraint: input/output/clobber/label.

Definition:InlineAsm.h:126

llvm::InlineAsm::ConstraintInfo::MatchingInput

int MatchingInput

MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...

Definition:InlineAsm.h:136

llvm::InlineAsm::ConstraintInfo::Codes

ConstraintCodeVector Codes

Code - The constraint code, either the register name (in braces) or the constraint letter/number.

Definition:InlineAsm.h:154

llvm::InlineAsm::ConstraintInfo::multipleAlternatives

SubConstraintInfoVector multipleAlternatives

multipleAlternatives - If there are multiple alternative constraints, this array will contain them.

Definition:InlineAsm.h:161

llvm::InlineAsm::ConstraintInfo::isIndirect

bool isIndirect

isIndirect - True if this operand is an indirect operand.

Definition:InlineAsm.h:150

llvm::InlineAsm::ConstraintInfo::hasMatchingInput

bool hasMatchingInput() const

hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.

Definition:InlineAsm.h:140

llvm::Inverse

Definition:GraphTraits.h:123

llvm::KnownBits

Definition:KnownBits.h:23

llvm::KnownBits::makeConstant

static KnownBits makeConstant(const APInt &C)

Create known bits from a known constant.

Definition:KnownBits.h:293

llvm::KnownBits::anyextOrTrunc

KnownBits anyextOrTrunc(unsigned BitWidth) const

Return known bits for an "any" extension or truncation of the value we're tracking.

Definition:KnownBits.h:178

llvm::KnownBits::countMinSignBits

unsigned countMinSignBits() const

Returns the number of times the sign bit is replicated into the other bits.

Definition:KnownBits.h:247

llvm::KnownBits::smax

static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)

Compute known bits for smax(LHS, RHS).

Definition:KnownBits.cpp:211

llvm::KnownBits::isNonNegative

bool isNonNegative() const

Returns true if this value is known to be non-negative.

Definition:KnownBits.h:100

llvm::KnownBits::isZero

bool isZero() const

Returns true if value is all zero.

Definition:KnownBits.h:79

llvm::KnownBits::countMinTrailingZeros

unsigned countMinTrailingZeros() const

Returns the minimum number of trailing zero bits.

Definition:KnownBits.h:234

llvm::KnownBits::isUnknown

bool isUnknown() const

Returns true if we don't know any bits.

Definition:KnownBits.h:65

llvm::KnownBits::trunc

KnownBits trunc(unsigned BitWidth) const

Return known bits for a truncation of the value we're tracking.

Definition:KnownBits.h:153

llvm::KnownBits::sge

static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)

Determine if these known bits always give the same ICMP_SGE result.

Definition:KnownBits.cpp:536

llvm::KnownBits::countMaxPopulation

unsigned countMaxPopulation() const

Returns the maximum number of bits that could be one.

Definition:KnownBits.h:281

llvm::KnownBits::concat

KnownBits concat(const KnownBits &Lo) const

Concatenate the bits from Lo onto the bottom of *this.

Definition:KnownBits.h:225

llvm::KnownBits::getBitWidth

unsigned getBitWidth() const

Get the bit width of this value.

Definition:KnownBits.h:43

llvm::KnownBits::umax

static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)

Compute known bits for umax(LHS, RHS).

Definition:KnownBits.cpp:187

llvm::KnownBits::zext

KnownBits zext(unsigned BitWidth) const

Return known bits for a zero extension of the value we're tracking.

Definition:KnownBits.h:164

llvm::KnownBits::resetAll

void resetAll()

Resets the known state of all bits.

Definition:KnownBits.h:73

llvm::KnownBits::unionWith

KnownBits unionWith(const KnownBits &RHS) const

Returns KnownBits information that is known to be true for either this or RHS or both.

Definition:KnownBits.h:313

llvm::KnownBits::intersectWith

KnownBits intersectWith(const KnownBits &RHS) const

Returns KnownBits information that is known to be true for both this and RHS.

Definition:KnownBits.h:303

llvm::KnownBits::sext

KnownBits sext(unsigned BitWidth) const

Return known bits for a sign extension of the value we're tracking.

Definition:KnownBits.h:172

llvm::KnownBits::countMinLeadingZeros

unsigned countMinLeadingZeros() const

Returns the minimum number of leading zero bits.

Definition:KnownBits.h:240

llvm::KnownBits::smin

static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)

Compute known bits for smin(LHS, RHS).

Definition:KnownBits.cpp:215

llvm::KnownBits::ugt

static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)

Determine if these known bits always give the same ICMP_UGT result.

Definition:KnownBits.cpp:502

llvm::KnownBits::slt

static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)

Determine if these known bits always give the same ICMP_SLT result.

Definition:KnownBits.cpp:542

llvm::KnownBits::computeForAddSub

static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)

Compute known bits resulting from adding LHS and RHS.

Definition:KnownBits.cpp:60

llvm::KnownBits::ult

static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)

Determine if these known bits always give the same ICMP_ULT result.

Definition:KnownBits.cpp:518

llvm::KnownBits::ule

static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)

Determine if these known bits always give the same ICMP_ULE result.

Definition:KnownBits.cpp:522

llvm::KnownBits::isNegative

bool isNegative() const

Returns true if this value is known to be negative.

Definition:KnownBits.h:97

llvm::KnownBits::One

APInt One

Definition:KnownBits.h:25

llvm::KnownBits::mul

static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)

Compute known bits resulting from multiplying LHS and RHS.

Definition:KnownBits.cpp:804

llvm::KnownBits::anyext

KnownBits anyext(unsigned BitWidth) const

Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...

Definition:KnownBits.h:159

llvm::KnownBits::Zero

APInt Zero

Definition:KnownBits.h:24

llvm::KnownBits::sle

static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)

Determine if these known bits always give the same ICMP_SLE result.

Definition:KnownBits.cpp:546

llvm::KnownBits::sgt

static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)

Determine if these known bits always give the same ICMP_SGT result.

Definition:KnownBits.cpp:526

llvm::KnownBits::countMinPopulation

unsigned countMinPopulation() const

Returns the number of bits known to be one.

Definition:KnownBits.h:278

llvm::KnownBits::uge

static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)

Determine if these known bits always give the same ICMP_UGE result.

Definition:KnownBits.cpp:512

llvm::KnownBits::umin

static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)

Compute known bits for umin(LHS, RHS).

Definition:KnownBits.cpp:205

llvm::MachinePointerInfo

This class contains a discriminated union of information about pointers in memory operands,...

Definition:MachineMemOperand.h:41

llvm::MachinePointerInfo::getConstantPool

static MachinePointerInfo getConstantPool(MachineFunction &MF)

Return a MachinePointerInfo record that refers to the constant pool.

Definition:MachineOperand.cpp:1066

llvm::MachinePointerInfo::getUnknownStack

static MachinePointerInfo getUnknownStack(MachineFunction &MF)

Stack memory without other information.

Definition:MachineOperand.cpp:1090

llvm::MachinePointerInfo::getFixedStack

static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.

Definition:MachineOperand.cpp:1072

llvm::MemOp

Definition:TargetLowering.h:115

llvm::MinMax

Definition:AssumeBundleQueries.h:70

llvm::SDNodeFlags

These are IR-level optimization flags that may be propagated to SDNodes.

Definition:SelectionDAGNodes.h:381

llvm::SDNodeFlags::hasNoUnsignedWrap

bool hasNoUnsignedWrap() const

Definition:SelectionDAGNodes.h:458

llvm::SDNodeFlags::NoWrap

@ NoWrap

Definition:SelectionDAGNodes.h:396

llvm::SDNodeFlags::Disjoint

@ Disjoint

Definition:SelectionDAGNodes.h:398

llvm::SDNodeFlags::Exact

@ Exact

Definition:SelectionDAGNodes.h:397

llvm::SDNodeFlags::NonNeg

@ NonNeg

Definition:SelectionDAGNodes.h:399

llvm::SDNodeFlags::NoSignedZeros

@ NoSignedZeros

Definition:SelectionDAGNodes.h:402

llvm::SDNodeFlags::Unpredictable

@ Unpredictable

Definition:SelectionDAGNodes.h:415

llvm::SDNodeFlags::None

@ None

Definition:SelectionDAGNodes.h:393

llvm::SDNodeFlags::hasNoSignedWrap

bool hasNoSignedWrap() const

Definition:SelectionDAGNodes.h:459

llvm::SDVTList

This represents a list of ValueType's that has been intern'd by a SelectionDAG.

Definition:SelectionDAGNodes.h:79

llvm::SignedDivisionByConstantInfo

Magic data for optimising signed division by a constant.

Definition:DivisionByConstantInfo.h:21

llvm::SignedDivisionByConstantInfo::ShiftAmount

unsigned ShiftAmount

shift amount

Definition:DivisionByConstantInfo.h:24

llvm::SignedDivisionByConstantInfo::Magic

APInt Magic

magic number

Definition:DivisionByConstantInfo.h:23

llvm::SignedDivisionByConstantInfo::get

static SignedDivisionByConstantInfo get(const APInt &D)

Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...

Definition:DivisionByConstantInfo.cpp:21

llvm::TargetLowering::AsmOperandInfo

This contains information for each constraint that we are lowering.

Definition:TargetLowering.h:4977

llvm::TargetLowering::AsmOperandInfo::ConstraintVT

MVT ConstraintVT

The ValueType for the operand value.

Definition:TargetLowering.h:4993

llvm::TargetLowering::AsmOperandInfo::ConstraintType

TargetLowering::ConstraintType ConstraintType

Information about the constraint code, e.g.

Definition:TargetLowering.h:4985

llvm::TargetLowering::AsmOperandInfo::ConstraintCode

std::string ConstraintCode

This contains the actual string for the code, like "m".

Definition:TargetLowering.h:4981

llvm::TargetLowering::AsmOperandInfo::CallOperandVal

Value * CallOperandVal

If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...

Definition:TargetLowering.h:4990

llvm::TargetLowering::AsmOperandInfo::getMatchedOperand

unsigned getMatchedOperand() const

If this is an input matching constraint, this method returns the output operand it matches.

Definition:TargetLowering.cpp:5720

llvm::TargetLowering::AsmOperandInfo::isMatchingInputConstraint

bool isMatchingInputConstraint() const

Return true of this is an input operand that is a matching constraint like "4".

Definition:TargetLowering.cpp:5713

llvm::TargetLowering::CallLoweringInfo

This structure contains all information that is necessary for lowering calls.

Definition:TargetLowering.h:4529

llvm::TargetLowering::CallLoweringInfo::setIsPostTypeLegalization

CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)

Definition:TargetLowering.h:4693

llvm::TargetLowering::CallLoweringInfo::setLibCallee

CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)

Definition:TargetLowering.h:4583

llvm::TargetLowering::CallLoweringInfo::setDiscardResult

CallLoweringInfo & setDiscardResult(bool Value=true)

Definition:TargetLowering.h:4658

llvm::TargetLowering::CallLoweringInfo::setZExtResult

CallLoweringInfo & setZExtResult(bool Value=true)

Definition:TargetLowering.h:4673

llvm::TargetLowering::CallLoweringInfo::setDebugLoc

CallLoweringInfo & setDebugLoc(const SDLoc &dl)

Definition:TargetLowering.h:4572

llvm::TargetLowering::CallLoweringInfo::setSExtResult

CallLoweringInfo & setSExtResult(bool Value=true)

Definition:TargetLowering.h:4668

llvm::TargetLowering::CallLoweringInfo::setNoReturn

CallLoweringInfo & setNoReturn(bool Value=true)

Definition:TargetLowering.h:4643

llvm::TargetLowering::CallLoweringInfo::setChain

CallLoweringInfo & setChain(SDValue InChain)

Definition:TargetLowering.h:4577

llvm::TargetLowering::DAGCombinerInfo

Definition:TargetLowering.h:4228

llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps

bool isBeforeLegalizeOps() const

Definition:TargetLowering.h:4240

llvm::TargetLowering::DAGCombinerInfo::AddToWorklist

void AddToWorklist(SDNode *N)

Definition:DAGCombiner.cpp:916

llvm::TargetLowering::DAGCombinerInfo::isCalledByLegalizer

bool isCalledByLegalizer() const

Definition:TargetLowering.h:4243

llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize

bool isBeforeLegalize() const

Definition:TargetLowering.h:4239

llvm::TargetLowering::DAGCombinerInfo::DAG

SelectionDAG & DAG

Definition:TargetLowering.h:4234

llvm::TargetLowering::DAGCombinerInfo::CommitTargetLoweringOpt

void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)

Definition:DAGCombiner.cpp:941

llvm::TargetLowering::MakeLibCallOptions

This structure is used to pass arguments to makeLibCall function.

Definition:TargetLowering.h:4714

llvm::TargetLowering::MakeLibCallOptions::setIsPostTypeLegalization

MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)

Definition:TargetLowering.h:4744

llvm::TargetLowering::MakeLibCallOptions::IsSigned

bool IsSigned

Definition:TargetLowering.h:4719

llvm::TargetLowering::MakeLibCallOptions::OpsVTBeforeSoften

ArrayRef< EVT > OpsVTBeforeSoften

Definition:TargetLowering.h:4717

llvm::TargetLowering::MakeLibCallOptions::IsPostTypeLegalization

bool IsPostTypeLegalization

Definition:TargetLowering.h:4722

llvm::TargetLowering::MakeLibCallOptions::setIsSigned

MakeLibCallOptions & setIsSigned(bool Value=true)

Definition:TargetLowering.h:4729

llvm::TargetLowering::MakeLibCallOptions::IsReturnValueUsed

bool IsReturnValueUsed

Definition:TargetLowering.h:4721

llvm::TargetLowering::MakeLibCallOptions::DoesNotReturn

bool DoesNotReturn

Definition:TargetLowering.h:4720

llvm::TargetLowering::MakeLibCallOptions::setTypeListBeforeSoften

MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)

Definition:TargetLowering.h:4749

llvm::TargetLowering::MakeLibCallOptions::IsSoften

bool IsSoften

Definition:TargetLowering.h:4723

llvm::TargetLowering::MakeLibCallOptions::RetVTBeforeSoften

EVT RetVTBeforeSoften

Definition:TargetLowering.h:4718

llvm::TargetLowering::TargetLoweringOpt

A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...

Definition:TargetLowering.h:3946

llvm::TargetLowering::TargetLoweringOpt::DAG

SelectionDAG & DAG

Definition:TargetLowering.h:3947

llvm::TargetLowering::TargetLoweringOpt::CombineTo

bool CombineTo(SDValue O, SDValue N)

Definition:TargetLowering.h:3960

llvm::TargetLowering::TargetLoweringOpt::LegalTypes

bool LegalTypes() const

Definition:TargetLowering.h:3957

llvm::TargetLowering::TargetLoweringOpt::New

SDValue New

Definition:TargetLowering.h:3951

llvm::TargetLowering::TargetLoweringOpt::LegalOps

bool LegalOps

Definition:TargetLowering.h:3949

llvm::TargetLowering::TargetLoweringOpt::LegalOperations

bool LegalOperations() const

Definition:TargetLowering.h:3958

llvm::UnsignedDivisionByConstantInfo

Magic data for optimising unsigned division by a constant.

Definition:DivisionByConstantInfo.h:28

llvm::UnsignedDivisionByConstantInfo::IsAdd

bool IsAdd

add indicator

Definition:DivisionByConstantInfo.h:33

llvm::UnsignedDivisionByConstantInfo::PreShift

unsigned PreShift

pre-shift amount

Definition:DivisionByConstantInfo.h:35

llvm::UnsignedDivisionByConstantInfo::get

static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)

Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...

Definition:DivisionByConstantInfo.cpp:74

llvm::UnsignedDivisionByConstantInfo::Magic

APInt Magic

magic number

Definition:DivisionByConstantInfo.h:32

llvm::UnsignedDivisionByConstantInfo::PostShift

unsigned PostShift

post-shift amount

Definition:DivisionByConstantInfo.h:34

llvm::fltSemantics

Definition:APFloat.cpp:103