Movatterモバイル変換

Go to the documentation of this file.

1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This file defines the interfaces that RISC-V uses to lower LLVM code into a

10// selection DAG.

11//

12//===----------------------------------------------------------------------===//

14#include "RISCVISelLowering.h"

15#include "MCTargetDesc/RISCVMatInt.h"

16#include "RISCV.h"

17#include "RISCVConstantPoolValue.h"

18#include "RISCVMachineFunctionInfo.h"

19#include "RISCVRegisterInfo.h"

20#include "RISCVSelectionDAGInfo.h"

21#include "RISCVSubtarget.h"

22#include "llvm/ADT/SmallSet.h"

23#include "llvm/ADT/Statistic.h"

24#include "llvm/Analysis/MemoryLocation.h"

25#include "llvm/Analysis/VectorUtils.h"

26#include "llvm/CodeGen/MachineFrameInfo.h"

27#include "llvm/CodeGen/MachineFunction.h"

28#include "llvm/CodeGen/MachineInstrBuilder.h"

29#include "llvm/CodeGen/MachineJumpTableInfo.h"

30#include "llvm/CodeGen/MachineRegisterInfo.h"

31#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"

32#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"

33#include "llvm/CodeGen/ValueTypes.h"

34#include "llvm/IR/DiagnosticInfo.h"

35#include "llvm/IR/DiagnosticPrinter.h"

36#include "llvm/IR/IRBuilder.h"

37#include "llvm/IR/Instructions.h"

38#include "llvm/IR/IntrinsicsRISCV.h"

39#include "llvm/IR/PatternMatch.h"

40#include "llvm/MC/MCCodeEmitter.h"

41#include "llvm/MC/MCInstBuilder.h"

42#include "llvm/Support/CommandLine.h"

43#include "llvm/Support/Debug.h"

44#include "llvm/Support/ErrorHandling.h"

45#include "llvm/Support/InstructionCost.h"

46#include "llvm/Support/KnownBits.h"

47#include "llvm/Support/MathExtras.h"

48#include "llvm/Support/raw_ostream.h"

49#include <optional>

51using namespacellvm;

53#define DEBUG_TYPE "riscv-lower"

55STATISTIC(NumTailCalls,"Number of tail calls");

57staticcl::opt<unsigned>ExtensionMaxWebSize(

58DEBUG_TYPE"-ext-max-web-size",cl::Hidden,

59cl::desc("Give the maximum size (in number of nodes) of the web of "

60"instructions that we will consider for VW expansion"),

61cl::init(18));

63staticcl::opt<bool>

64AllowSplatInVW_W(DEBUG_TYPE"-form-vw-w-with-splat",cl::Hidden,

65cl::desc("Allow the formation of VW_W operations (e.g., "

66"VWADD_W) with splat constants"),

67cl::init(false));

69staticcl::opt<unsigned>NumRepeatedDivisors(

70DEBUG_TYPE"-fp-repeated-divisors",cl::Hidden,

71cl::desc("Set the minimum number of repetitions of a divisor to allow "

72"transformation to multiplications by the reciprocal"),

73cl::init(2));

75staticcl::opt<int>

76FPImmCost(DEBUG_TYPE"-fpimm-cost",cl::Hidden,

77cl::desc("Give the maximum number of instructions that we will "

78"use for creating a floating-point immediate value"),

79cl::init(2));

81RISCVTargetLowering::RISCVTargetLowering(constTargetMachine &TM,

82constRISCVSubtarget &STI)

83 :TargetLowering(TM), Subtarget(STI) {

85RISCVABI::ABI ABI = Subtarget.getTargetABI();

86assert(ABI !=RISCVABI::ABI_Unknown &&"Improperly initialised target ABI");

88if ((ABI ==RISCVABI::ABI_ILP32F || ABI ==RISCVABI::ABI_LP64F) &&

89 !Subtarget.hasStdExtF()) {

90errs() <<"Hard-float 'f' ABI can't be used for a target that "

91"doesn't support the F instruction set extension (ignoring "

92"target-abi)\n";

93 ABI = Subtarget.is64Bit() ?RISCVABI::ABI_LP64 :RISCVABI::ABI_ILP32;

94 }elseif ((ABI ==RISCVABI::ABI_ILP32D || ABI ==RISCVABI::ABI_LP64D) &&

95 !Subtarget.hasStdExtD()) {

96errs() <<"Hard-float 'd' ABI can't be used for a target that "

97"doesn't support the D instruction set extension (ignoring "

98"target-abi)\n";

99 ABI = Subtarget.is64Bit() ?RISCVABI::ABI_LP64 :RISCVABI::ABI_ILP32;

100 }

101

102switch (ABI) {

103default:

104report_fatal_error("Don't know how to lower this ABI");

105caseRISCVABI::ABI_ILP32:

106caseRISCVABI::ABI_ILP32E:

107caseRISCVABI::ABI_LP64E:

108caseRISCVABI::ABI_ILP32F:

109caseRISCVABI::ABI_ILP32D:

110caseRISCVABI::ABI_LP64:

111caseRISCVABI::ABI_LP64F:

112caseRISCVABI::ABI_LP64D:

113break;

114 }

115

116MVT XLenVT = Subtarget.getXLenVT();

117

118// Set up the register classes.

119addRegisterClass(XLenVT, &RISCV::GPRRegClass);

120

121if (Subtarget.hasStdExtZfhmin())

122addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);

123if (Subtarget.hasStdExtZfbfmin())

124addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);

125if (Subtarget.hasStdExtF())

126addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);

127if (Subtarget.hasStdExtD())

128addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);

129if (Subtarget.hasStdExtZhinxmin())

130addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);

131if (Subtarget.hasStdExtZfinx())

132addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);

133if (Subtarget.hasStdExtZdinx()) {

134if (Subtarget.is64Bit())

135addRegisterClass(MVT::f64, &RISCV::GPRRegClass);

136else

137addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);

138 }

139

140staticconstMVT::SimpleValueType BoolVecVTs[] = {

141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,

142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};

143staticconstMVT::SimpleValueType IntVecVTs[] = {

144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,

145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,

146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,

147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,

148 MVT::nxv4i64, MVT::nxv8i64};

149staticconstMVT::SimpleValueType F16VecVTs[] = {

150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,

151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};

152staticconstMVT::SimpleValueType BF16VecVTs[] = {

153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,

154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};

155staticconstMVT::SimpleValueType F32VecVTs[] = {

156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};

157staticconstMVT::SimpleValueType F64VecVTs[] = {

158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};

159staticconstMVT::SimpleValueType VecTupleVTs[] = {

160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,

161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,

162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,

163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,

164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,

165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,

166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,

167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,

168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,

169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,

170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};

171

172if (Subtarget.hasVInstructions()) {

173auto addRegClassForRVV = [this](MVT VT) {

174// Disable the smallest fractional LMUL types if ELEN is less than

175// RVVBitsPerBlock.

176unsigned MinElts =RISCV::RVVBitsPerBlock / Subtarget.getELen();

177if (VT.getVectorMinNumElements() < MinElts)

178return;

179

180unsignedSize = VT.getSizeInBits().getKnownMinValue();

181constTargetRegisterClass *RC;

182if (Size <=RISCV::RVVBitsPerBlock)

183 RC = &RISCV::VRRegClass;

184elseif (Size == 2 *RISCV::RVVBitsPerBlock)

185 RC = &RISCV::VRM2RegClass;

186elseif (Size == 4 *RISCV::RVVBitsPerBlock)

187 RC = &RISCV::VRM4RegClass;

188elseif (Size == 8 *RISCV::RVVBitsPerBlock)

189 RC = &RISCV::VRM8RegClass;

190else

191llvm_unreachable("Unexpected size");

192

193addRegisterClass(VT, RC);

194 };

195

196for (MVT VT : BoolVecVTs)

197 addRegClassForRVV(VT);

198for (MVT VT : IntVecVTs) {

199if (VT.getVectorElementType() == MVT::i64 &&

200 !Subtarget.hasVInstructionsI64())

201continue;

202 addRegClassForRVV(VT);

203 }

204

205if (Subtarget.hasVInstructionsF16Minimal())

206for (MVT VT : F16VecVTs)

207 addRegClassForRVV(VT);

208

209if (Subtarget.hasVInstructionsBF16Minimal())

210for (MVT VT : BF16VecVTs)

211 addRegClassForRVV(VT);

212

213if (Subtarget.hasVInstructionsF32())

214for (MVT VT : F32VecVTs)

215 addRegClassForRVV(VT);

216

217if (Subtarget.hasVInstructionsF64())

218for (MVT VT : F64VecVTs)

219 addRegClassForRVV(VT);

220

221if (Subtarget.useRVVForFixedLengthVectors()) {

222auto addRegClassForFixedVectors = [this](MVT VT) {

223MVT ContainerVT =getContainerForFixedLengthVector(VT);

224unsigned RCID =getRegClassIDForVecVT(ContainerVT);

225constRISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();

226addRegisterClass(VT,TRI.getRegClass(RCID));

227 };

228for (MVT VT :MVT::integer_fixedlen_vector_valuetypes())

229if (useRVVForFixedLengthVectorVT(VT))

230 addRegClassForFixedVectors(VT);

231

232for (MVT VT :MVT::fp_fixedlen_vector_valuetypes())

233if (useRVVForFixedLengthVectorVT(VT))

234 addRegClassForFixedVectors(VT);

235 }

236

237addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);

238addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);

239addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);

240addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);

241addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);

242addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);

243addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);

244addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);

245addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);

246addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);

247addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);

248addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);

249addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);

250addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);

251addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);

252addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);

253addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);

254addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);

255addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);

256addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);

257addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);

258addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);

259addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);

260addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);

261addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);

262addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);

263addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);

264addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);

265addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);

266addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);

267addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);

268addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);

269 }

270

271// Compute derived properties from the register classes.

272computeRegisterProperties(STI.getRegisterInfo());

273

274setStackPointerRegisterToSaveRestore(RISCV::X2);

275

276setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, XLenVT,

277 MVT::i1,Promote);

278// DAGCombiner can call isLoadExtLegal for types that aren't legal.

279setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, MVT::i32,

280 MVT::i1,Promote);

281

282// TODO: add all necessary setOperationAction calls.

283setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT,Custom);

284

285setOperationAction(ISD::BR_JT, MVT::Other,Expand);

286setOperationAction(ISD::BR_CC, XLenVT,Expand);

287setOperationAction(ISD::BRCOND, MVT::Other,Custom);

288setOperationAction(ISD::SELECT_CC, XLenVT,Expand);

289

290setCondCodeAction(ISD::SETGT, XLenVT,Custom);

291setCondCodeAction(ISD::SETGE, XLenVT,Expand);

292setCondCodeAction(ISD::SETUGT, XLenVT,Custom);

293setCondCodeAction(ISD::SETUGE, XLenVT,Expand);

294if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {

295setCondCodeAction(ISD::SETULE, XLenVT,Expand);

296setCondCodeAction(ISD::SETLE, XLenVT,Expand);

297 }

298

299setOperationAction({ISD::STACKSAVE,ISD::STACKRESTORE}, MVT::Other,Expand);

300

301setOperationAction(ISD::VASTART, MVT::Other,Custom);

302setOperationAction({ISD::VAARG,ISD::VACOPY,ISD::VAEND}, MVT::Other,Expand);

303

304if (!Subtarget.hasVendorXTHeadBb())

305setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,Expand);

306

307setOperationAction(ISD::EH_DWARF_CFA, MVT::i32,Custom);

308

309if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&

310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))

311setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16},Expand);

312

313if (Subtarget.is64Bit()) {

314setOperationAction(ISD::EH_DWARF_CFA, MVT::i64,Custom);

315

316setOperationAction(ISD::LOAD, MVT::i32,Custom);

317setOperationAction({ISD::ADD,ISD::SUB,ISD::SHL,ISD::SRA,ISD::SRL},

318 MVT::i32,Custom);

319setOperationAction({ISD::UADDO,ISD::USUBO}, MVT::i32,Custom);

320if (!Subtarget.hasStdExtZbb())

321setOperationAction(

322 {ISD::SADDSAT,ISD::SSUBSAT,ISD::UADDSAT,ISD::USUBSAT}, MVT::i32,

323Custom);

324setOperationAction(ISD::SADDO, MVT::i32,Custom);

325 }

326if (!Subtarget.hasStdExtZmmul()) {

327setOperationAction({ISD::MUL,ISD::MULHS,ISD::MULHU}, XLenVT,Expand);

328 }elseif (Subtarget.is64Bit()) {

329setOperationAction(ISD::MUL, MVT::i128,Custom);

330setOperationAction(ISD::MUL, MVT::i32,Custom);

331 }else {

332setOperationAction(ISD::MUL, MVT::i64,Custom);

333 }

334

335if (!Subtarget.hasStdExtM()) {

336setOperationAction({ISD::SDIV,ISD::UDIV,ISD::SREM,ISD::UREM}, XLenVT,

337Expand);

338 }elseif (Subtarget.is64Bit()) {

339setOperationAction({ISD::SDIV,ISD::UDIV,ISD::UREM},

340 {MVT::i8, MVT::i16, MVT::i32},Custom);

341 }

342

343setOperationAction(

344 {ISD::SDIVREM,ISD::UDIVREM,ISD::SMUL_LOHI,ISD::UMUL_LOHI}, XLenVT,

345Expand);

346

347setOperationAction({ISD::SHL_PARTS,ISD::SRL_PARTS,ISD::SRA_PARTS}, XLenVT,

348Custom);

349

350if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {

351if (Subtarget.is64Bit())

352setOperationAction({ISD::ROTL,ISD::ROTR}, MVT::i32,Custom);

353 }elseif (Subtarget.hasVendorXTHeadBb()) {

354if (Subtarget.is64Bit())

355setOperationAction({ISD::ROTL,ISD::ROTR}, MVT::i32,Custom);

356setOperationAction({ISD::ROTL,ISD::ROTR}, XLenVT,Custom);

357 }elseif (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {

358setOperationAction(ISD::ROTL, XLenVT,Expand);

359 }else {

360setOperationAction({ISD::ROTL,ISD::ROTR}, XLenVT,Expand);

361 }

362

363// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll

364// pattern match it directly in isel.

365setOperationAction(ISD::BSWAP, XLenVT,

366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||

367 Subtarget.hasVendorXTHeadBb())

368 ?Legal

369 :Expand);

370

371if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {

372setOperationAction(ISD::BITREVERSE, XLenVT,Legal);

373 }else {

374// Zbkb can use rev8+brev8 to implement bitreverse.

375setOperationAction(ISD::BITREVERSE, XLenVT,

376 Subtarget.hasStdExtZbkb() ?Custom :Expand);

377 }

378

379if (Subtarget.hasStdExtZbb() ||

380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {

381setOperationAction({ISD::SMIN,ISD::SMAX,ISD::UMIN,ISD::UMAX}, XLenVT,

382Legal);

383 }

384

385if (Subtarget.hasStdExtZbb() ||

386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {

387if (Subtarget.is64Bit())

388setOperationAction({ISD::CTTZ,ISD::CTTZ_ZERO_UNDEF}, MVT::i32,Custom);

389 }else {

390setOperationAction({ISD::CTTZ,ISD::CTPOP}, XLenVT,Expand);

391 }

392

393if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||

394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {

395// We need the custom lowering to make sure that the resulting sequence

396// for the 32bit case is efficient on 64bit targets.

397if (Subtarget.is64Bit())

398setOperationAction({ISD::CTLZ,ISD::CTLZ_ZERO_UNDEF}, MVT::i32,Custom);

399 }else {

400setOperationAction(ISD::CTLZ, XLenVT,Expand);

401 }

402

403if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {

404setOperationAction(ISD::ABS, XLenVT,Legal);

405 }elseif (Subtarget.hasShortForwardBranchOpt()) {

406// We can use PseudoCCSUB to implement ABS.

407setOperationAction(ISD::ABS, XLenVT,Legal);

408 }elseif (Subtarget.is64Bit()) {

409setOperationAction(ISD::ABS, MVT::i32,Custom);

410 }

411

412if (Subtarget.useCCMovInsn())

413setOperationAction(ISD::SELECT, XLenVT,Legal);

414elseif (!Subtarget.hasVendorXTHeadCondMov())

415setOperationAction(ISD::SELECT, XLenVT,Custom);

416

417staticconstunsigned FPLegalNodeTypes[] = {

418ISD::FMINNUM,ISD::FMAXNUM,ISD::FMINIMUMNUM,

419ISD::FMAXIMUMNUM,ISD::LRINT,ISD::LLRINT,

420ISD::LROUND,ISD::LLROUND,ISD::STRICT_LRINT,

421ISD::STRICT_LLRINT,ISD::STRICT_LROUND,ISD::STRICT_LLROUND,

422ISD::STRICT_FMA,ISD::STRICT_FADD,ISD::STRICT_FSUB,

423ISD::STRICT_FMUL,ISD::STRICT_FDIV,ISD::STRICT_FSQRT,

424ISD::STRICT_FSETCC,ISD::STRICT_FSETCCS,ISD::FCANONICALIZE};

425

426staticconstISD::CondCode FPCCToExpand[] = {

427ISD::SETOGT,ISD::SETOGE,ISD::SETONE,ISD::SETUEQ,ISD::SETUGT,

428ISD::SETUGE,ISD::SETULT,ISD::SETULE,ISD::SETUNE,ISD::SETGT,

429ISD::SETGE,ISD::SETNE,ISD::SETO,ISD::SETUO};

430

431staticconstunsigned FPOpToExpand[] = {

432ISD::FSIN,ISD::FCOS,ISD::FSINCOS,ISD::FPOW,

433ISD::FREM};

434

435staticconstunsigned FPRndMode[] = {

436ISD::FCEIL,ISD::FFLOOR,ISD::FTRUNC,ISD::FRINT,ISD::FROUND,

437ISD::FROUNDEVEN};

438

439staticconstunsigned ZfhminZfbfminPromoteOps[] = {

440ISD::FMINNUM,ISD::FMAXNUM,ISD::FMAXIMUMNUM,

441ISD::FMINIMUMNUM,ISD::FADD,ISD::FSUB,

442ISD::FMUL,ISD::FMA,ISD::FDIV,

443ISD::FSQRT,ISD::STRICT_FMA,ISD::STRICT_FADD,

444ISD::STRICT_FSUB,ISD::STRICT_FMUL,ISD::STRICT_FDIV,

445ISD::STRICT_FSQRT,ISD::STRICT_FSETCC,ISD::STRICT_FSETCCS,

446ISD::SETCC,ISD::FCEIL,ISD::FFLOOR,

447ISD::FTRUNC,ISD::FRINT,ISD::FROUND,

448ISD::FROUNDEVEN,ISD::FCANONICALIZE};

449

450if (Subtarget.hasStdExtZfbfmin()) {

451setOperationAction(ISD::BITCAST, MVT::i16,Custom);

452setOperationAction(ISD::ConstantFP, MVT::bf16,Expand);

453setOperationAction(ISD::SELECT_CC, MVT::bf16,Expand);

454setOperationAction(ISD::SELECT, MVT::bf16,Custom);

455setOperationAction(ISD::BR_CC, MVT::bf16,Expand);

456setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16,Promote);

457setOperationAction(ISD::FREM, MVT::bf16,Promote);

458setOperationAction(ISD::FABS, MVT::bf16,Custom);

459setOperationAction(ISD::FNEG, MVT::bf16,Custom);

460setOperationAction(ISD::FCOPYSIGN, MVT::bf16,Custom);

461setOperationAction({ISD::FP_TO_SINT,ISD::FP_TO_UINT}, XLenVT,Custom);

462setOperationAction({ISD::SINT_TO_FP,ISD::UINT_TO_FP}, XLenVT,Custom);

463 }

464

465if (Subtarget.hasStdExtZfhminOrZhinxmin()) {

466if (Subtarget.hasStdExtZfhOrZhinx()) {

467setOperationAction(FPLegalNodeTypes, MVT::f16,Legal);

468setOperationAction(FPRndMode, MVT::f16,

469 Subtarget.hasStdExtZfa() ?Legal :Custom);

470setOperationAction(ISD::IS_FPCLASS, MVT::f16,Custom);

471setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f16,

472 Subtarget.hasStdExtZfa() ?Legal :Custom);

473if (Subtarget.hasStdExtZfa())

474setOperationAction(ISD::ConstantFP, MVT::f16,Custom);

475 }else {

476setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16,Promote);

477setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f16,Promote);

478for (autoOp : {ISD::LROUND,ISD::LLROUND,ISD::LRINT,ISD::LLRINT,

479ISD::STRICT_LROUND,ISD::STRICT_LLROUND,

480ISD::STRICT_LRINT,ISD::STRICT_LLRINT})

481setOperationAction(Op, MVT::f16,Custom);

482setOperationAction(ISD::FABS, MVT::f16,Custom);

483setOperationAction(ISD::FNEG, MVT::f16,Custom);

484setOperationAction(ISD::FCOPYSIGN, MVT::f16,Custom);

485setOperationAction({ISD::FP_TO_SINT,ISD::FP_TO_UINT}, XLenVT,Custom);

486setOperationAction({ISD::SINT_TO_FP,ISD::UINT_TO_FP}, XLenVT,Custom);

487 }

488

489setOperationAction(ISD::BITCAST, MVT::i16,Custom);

490

491setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16,Legal);

492setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32,Legal);

493setCondCodeAction(FPCCToExpand, MVT::f16,Expand);

494setOperationAction(ISD::SELECT_CC, MVT::f16,Expand);

495setOperationAction(ISD::SELECT, MVT::f16,Custom);

496setOperationAction(ISD::BR_CC, MVT::f16,Expand);

497

498setOperationAction(

499ISD::FNEARBYINT, MVT::f16,

500 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ?Legal :Promote);

501setOperationAction({ISD::FREM,ISD::FPOW,ISD::FPOWI,

502ISD::FCOS,ISD::FSIN,ISD::FSINCOS,ISD::FEXP,

503ISD::FEXP2,ISD::FEXP10,ISD::FLOG,ISD::FLOG2,

504ISD::FLOG10,ISD::FLDEXP,ISD::FFREXP},

505 MVT::f16,Promote);

506

507// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have

508// complete support for all operations in LegalizeDAG.

509setOperationAction({ISD::STRICT_FCEIL,ISD::STRICT_FFLOOR,

510ISD::STRICT_FNEARBYINT,ISD::STRICT_FRINT,

511ISD::STRICT_FROUND,ISD::STRICT_FROUNDEVEN,

512ISD::STRICT_FTRUNC,ISD::STRICT_FLDEXP},

513 MVT::f16,Promote);

514

515// We need to custom promote this.

516if (Subtarget.is64Bit())

517setOperationAction(ISD::FPOWI, MVT::i32,Custom);

518 }

519

520if (Subtarget.hasStdExtFOrZfinx()) {

521setOperationAction(FPLegalNodeTypes, MVT::f32,Legal);

522setOperationAction(FPRndMode, MVT::f32,

523 Subtarget.hasStdExtZfa() ?Legal :Custom);

524setCondCodeAction(FPCCToExpand, MVT::f32,Expand);

525setOperationAction(ISD::SELECT_CC, MVT::f32,Expand);

526setOperationAction(ISD::SELECT, MVT::f32,Custom);

527setOperationAction(ISD::BR_CC, MVT::f32,Expand);

528setOperationAction(FPOpToExpand, MVT::f32,Expand);

529setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16,Expand);

530setTruncStoreAction(MVT::f32, MVT::f16,Expand);

531setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16,Expand);

532setTruncStoreAction(MVT::f32, MVT::bf16,Expand);

533setOperationAction(ISD::IS_FPCLASS, MVT::f32,Custom);

534setOperationAction(ISD::BF16_TO_FP, MVT::f32,Custom);

535setOperationAction(ISD::FP_TO_BF16, MVT::f32,

536 Subtarget.isSoftFPABI() ?LibCall :Custom);

537setOperationAction(ISD::FP_TO_FP16, MVT::f32,Custom);

538setOperationAction(ISD::FP16_TO_FP, MVT::f32,Custom);

539setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32,Custom);

540setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32,Custom);

541

542if (Subtarget.hasStdExtZfa()) {

543setOperationAction(ISD::ConstantFP, MVT::f32,Custom);

544setOperationAction(ISD::FNEARBYINT, MVT::f32,Legal);

545setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f32,Legal);

546 }else {

547setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f32,Custom);

548 }

549 }

550

551if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())

552setOperationAction(ISD::BITCAST, MVT::i32,Custom);

553

554if (Subtarget.hasStdExtDOrZdinx()) {

555setOperationAction(FPLegalNodeTypes, MVT::f64,Legal);

556

557if (!Subtarget.is64Bit())

558setOperationAction(ISD::BITCAST, MVT::i64,Custom);

559

560if (Subtarget.hasStdExtZfa()) {

561setOperationAction(ISD::ConstantFP, MVT::f64,Custom);

562setOperationAction(FPRndMode, MVT::f64,Legal);

563setOperationAction(ISD::FNEARBYINT, MVT::f64,Legal);

564setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f64,Legal);

565 }else {

566if (Subtarget.is64Bit())

567setOperationAction(FPRndMode, MVT::f64,Custom);

568

569setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, MVT::f64,Custom);

570 }

571

572setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32,Legal);

573setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64,Legal);

574setCondCodeAction(FPCCToExpand, MVT::f64,Expand);

575setOperationAction(ISD::SELECT_CC, MVT::f64,Expand);

576setOperationAction(ISD::SELECT, MVT::f64,Custom);

577setOperationAction(ISD::BR_CC, MVT::f64,Expand);

578setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32,Expand);

579setTruncStoreAction(MVT::f64, MVT::f32,Expand);

580setOperationAction(FPOpToExpand, MVT::f64,Expand);

581setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16,Expand);

582setTruncStoreAction(MVT::f64, MVT::f16,Expand);

583setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16,Expand);

584setTruncStoreAction(MVT::f64, MVT::bf16,Expand);

585setOperationAction(ISD::IS_FPCLASS, MVT::f64,Custom);

586setOperationAction(ISD::BF16_TO_FP, MVT::f64,Custom);

587setOperationAction(ISD::FP_TO_BF16, MVT::f64,

588 Subtarget.isSoftFPABI() ?LibCall :Custom);

589setOperationAction(ISD::FP_TO_FP16, MVT::f64,Custom);

590setOperationAction(ISD::FP16_TO_FP, MVT::f64,Expand);

591setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64,Custom);

592setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64,Expand);

593 }

594

595if (Subtarget.is64Bit()) {

596setOperationAction({ISD::FP_TO_UINT,ISD::FP_TO_SINT,

597ISD::STRICT_FP_TO_UINT,ISD::STRICT_FP_TO_SINT},

598 MVT::i32,Custom);

599setOperationAction(ISD::LROUND, MVT::i32,Custom);

600 }

601

602if (Subtarget.hasStdExtFOrZfinx()) {

603setOperationAction({ISD::FP_TO_UINT_SAT,ISD::FP_TO_SINT_SAT}, XLenVT,

604Custom);

605

606// f16/bf16 require custom handling.

607setOperationAction({ISD::STRICT_FP_TO_UINT,ISD::STRICT_FP_TO_SINT}, XLenVT,

608Custom);

609setOperationAction({ISD::STRICT_UINT_TO_FP,ISD::STRICT_SINT_TO_FP}, XLenVT,

610Custom);

611

612setOperationAction(ISD::GET_ROUNDING, XLenVT,Custom);

613setOperationAction(ISD::SET_ROUNDING, MVT::Other,Custom);

614 }

615

616setOperationAction({ISD::GlobalAddress,ISD::BlockAddress,ISD::ConstantPool,

617ISD::JumpTable},

618 XLenVT,Custom);

619

620setOperationAction(ISD::GlobalTLSAddress, XLenVT,Custom);

621

622if (Subtarget.is64Bit())

623setOperationAction(ISD::Constant, MVT::i64,Custom);

624

625// TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.

626// Unfortunately this can't be determined just from the ISA naming string.

627setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,

628 Subtarget.is64Bit() ?Legal :Custom);

629setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,

630 Subtarget.is64Bit() ?Legal :Custom);

631

632if (Subtarget.is64Bit()) {

633setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other,Custom);

634setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other,Custom);

635 }

636

637setOperationAction({ISD::TRAP,ISD::DEBUGTRAP}, MVT::Other,Legal);

638setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other,Custom);

639if (Subtarget.is64Bit())

640setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32,Custom);

641

642if (Subtarget.hasStdExtZicbop()) {

643setOperationAction(ISD::PREFETCH, MVT::Other,Legal);

644 }

645

646if (Subtarget.hasStdExtA()) {

647setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());

648if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())

649setMinCmpXchgSizeInBits(8);

650else

651setMinCmpXchgSizeInBits(32);

652 }elseif (Subtarget.hasForcedAtomics()) {

653setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());

654 }else {

655setMaxAtomicSizeInBitsSupported(0);

656 }

657

658setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,Custom);

659

660setBooleanContents(ZeroOrOneBooleanContent);

661

662if (getTargetMachine().getTargetTriple().isOSLinux()) {

663// Custom lowering of llvm.clear_cache.

664setOperationAction(ISD::CLEAR_CACHE, MVT::Other,Custom);

665 }

666

667if (Subtarget.hasVInstructions()) {

668setBooleanVectorContents(ZeroOrOneBooleanContent);

669

670setOperationAction(ISD::VSCALE, XLenVT,Custom);

671

672// RVV intrinsics may have illegal operands.

673// We also need to custom legalize vmv.x.s.

674setOperationAction({ISD::INTRINSIC_WO_CHAIN,ISD::INTRINSIC_W_CHAIN,

675ISD::INTRINSIC_VOID},

676 {MVT::i8, MVT::i16},Custom);

677if (Subtarget.is64Bit())

678setOperationAction({ISD::INTRINSIC_W_CHAIN,ISD::INTRINSIC_VOID},

679 MVT::i32,Custom);

680else

681setOperationAction({ISD::INTRINSIC_WO_CHAIN,ISD::INTRINSIC_W_CHAIN},

682 MVT::i64,Custom);

683

684setOperationAction({ISD::INTRINSIC_W_CHAIN,ISD::INTRINSIC_VOID},

685 MVT::Other,Custom);

686

687staticconstunsigned IntegerVPOps[] = {

688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,

689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,

690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,

691 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,

692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,

693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,

694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,

695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,

696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,

697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,

698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,

699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,

700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,

701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,

702 ISD::EXPERIMENTAL_VP_SPLAT};

703

704staticconstunsigned FloatingPointVPOps[] = {

705 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,

706 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,

707 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,

708 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,

709 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,

710 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,

711 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,

712 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,

713 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,

714 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,

715 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,

716 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,

717 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,

718 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};

719

720staticconstunsigned IntegerVecReduceOps[] = {

721ISD::VECREDUCE_ADD,ISD::VECREDUCE_AND,ISD::VECREDUCE_OR,

722ISD::VECREDUCE_XOR,ISD::VECREDUCE_SMAX,ISD::VECREDUCE_SMIN,

723ISD::VECREDUCE_UMAX,ISD::VECREDUCE_UMIN};

724

725staticconstunsigned FloatingPointVecReduceOps[] = {

726ISD::VECREDUCE_FADD,ISD::VECREDUCE_SEQ_FADD,ISD::VECREDUCE_FMIN,

727ISD::VECREDUCE_FMAX,ISD::VECREDUCE_FMINIMUM,ISD::VECREDUCE_FMAXIMUM};

728

729staticconstunsigned FloatingPointLibCallOps[] = {

730ISD::FREM,ISD::FPOW,ISD::FCOS,ISD::FSIN,ISD::FSINCOS,ISD::FEXP,

731ISD::FEXP2,ISD::FEXP10,ISD::FLOG,ISD::FLOG2,ISD::FLOG10};

732

733if (!Subtarget.is64Bit()) {

734// We must custom-lower certain vXi64 operations on RV32 due to the vector

735// element type being illegal.

736setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT},

737 MVT::i64,Custom);

738

739setOperationAction(IntegerVecReduceOps, MVT::i64,Custom);

740

741setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,

742 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,

743 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,

744 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},

745 MVT::i64,Custom);

746 }

747

748for (MVT VT : BoolVecVTs) {

749if (!isTypeLegal(VT))

750continue;

751

752setOperationAction(ISD::SPLAT_VECTOR, VT,Custom);

753

754// Mask VTs are custom-expanded into a series of standard nodes

755setOperationAction({ISD::TRUNCATE,ISD::CONCAT_VECTORS,

756ISD::INSERT_SUBVECTOR,ISD::EXTRACT_SUBVECTOR,

757ISD::SCALAR_TO_VECTOR},

758 VT,Custom);

759

760setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT}, VT,

761Custom);

762

763setOperationAction(ISD::SELECT, VT,Custom);

764setOperationAction({ISD::SELECT_CC,ISD::VSELECT, ISD::VP_SELECT}, VT,

765Expand);

766setOperationAction(ISD::VP_MERGE, VT,Custom);

767

768setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,

769Custom);

770

771setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT,Custom);

772

773setOperationAction(

774 {ISD::VECREDUCE_AND,ISD::VECREDUCE_OR,ISD::VECREDUCE_XOR}, VT,

775Custom);

776

777setOperationAction(

778 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,

779Custom);

780

781// RVV has native int->float & float->int conversions where the

782// element type sizes are within one power-of-two of each other. Any

783// wider distances between type sizes have to be lowered as sequences

784// which progressively narrow the gap in stages.

785setOperationAction({ISD::SINT_TO_FP,ISD::UINT_TO_FP,ISD::FP_TO_SINT,

786ISD::FP_TO_UINT,ISD::STRICT_SINT_TO_FP,

787ISD::STRICT_UINT_TO_FP,ISD::STRICT_FP_TO_SINT,

788ISD::STRICT_FP_TO_UINT},

789 VT,Custom);

790setOperationAction({ISD::FP_TO_SINT_SAT,ISD::FP_TO_UINT_SAT}, VT,

791Custom);

792

793// Expand all extending loads to types larger than this, and truncating

794// stores from types larger than this.

795for (MVT OtherVT :MVT::integer_scalable_vector_valuetypes()) {

796setTruncStoreAction(VT, OtherVT,Expand);

797setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, VT,

798 OtherVT,Expand);

799 }

800

801setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,

802 ISD::VP_TRUNCATE, ISD::VP_SETCC},

803 VT,Custom);

804

805setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT,Custom);

806setOperationAction(ISD::VECTOR_INTERLEAVE, VT,Custom);

807

808setOperationAction(ISD::VECTOR_REVERSE, VT,Custom);

809

810setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT,Custom);

811setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT,Custom);

812

813setOperationPromotedToType(

814ISD::VECTOR_SPLICE, VT,

815MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));

816 }

817

818for (MVT VT : IntVecVTs) {

819if (!isTypeLegal(VT))

820continue;

821

822setOperationAction(ISD::SPLAT_VECTOR, VT,Legal);

823setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT,Custom);

824

825// Vectors implement MULHS/MULHU.

826setOperationAction({ISD::SMUL_LOHI,ISD::UMUL_LOHI}, VT,Expand);

827

828// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.

829if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())

830setOperationAction({ISD::MULHU,ISD::MULHS}, VT,Expand);

831

832setOperationAction({ISD::SMIN,ISD::SMAX,ISD::UMIN,ISD::UMAX}, VT,

833Legal);

834

835setOperationAction({ISD::ABDS,ISD::ABDU}, VT,Custom);

836

837// Custom-lower extensions and truncations from/to mask types.

838setOperationAction({ISD::ANY_EXTEND,ISD::SIGN_EXTEND,ISD::ZERO_EXTEND},

839 VT,Custom);

840

841// RVV has native int->float & float->int conversions where the

842// element type sizes are within one power-of-two of each other. Any

843// wider distances between type sizes have to be lowered as sequences

844// which progressively narrow the gap in stages.

845setOperationAction({ISD::SINT_TO_FP,ISD::UINT_TO_FP,ISD::FP_TO_SINT,

846ISD::FP_TO_UINT,ISD::STRICT_SINT_TO_FP,

847ISD::STRICT_UINT_TO_FP,ISD::STRICT_FP_TO_SINT,

848ISD::STRICT_FP_TO_UINT},

849 VT,Custom);

850setOperationAction({ISD::FP_TO_SINT_SAT,ISD::FP_TO_UINT_SAT}, VT,

851Custom);

852setOperationAction({ISD::AVGFLOORS,ISD::AVGFLOORU,ISD::AVGCEILS,

853ISD::AVGCEILU,ISD::SADDSAT,ISD::UADDSAT,

854ISD::SSUBSAT,ISD::USUBSAT},

855 VT,Legal);

856

857// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"

858// nodes which truncate by one power of two at a time.

859setOperationAction(

860 {ISD::TRUNCATE,ISD::TRUNCATE_SSAT_S,ISD::TRUNCATE_USAT_U}, VT,

861Custom);

862

863// Custom-lower insert/extract operations to simplify patterns.

864setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT}, VT,

865Custom);

866

867// Custom-lower reduction operations to set up the corresponding custom

868// nodes' operands.

869setOperationAction(IntegerVecReduceOps, VT,Custom);

870

871setOperationAction(IntegerVPOps, VT,Custom);

872

873setOperationAction({ISD::LOAD,ISD::STORE}, VT,Custom);

874

875setOperationAction({ISD::MLOAD,ISD::MSTORE,ISD::MGATHER,ISD::MSCATTER},

876 VT,Custom);

877

878setOperationAction(

879 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

880 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},

881 VT,Custom);

882

883setOperationAction({ISD::CONCAT_VECTORS,ISD::INSERT_SUBVECTOR,

884ISD::EXTRACT_SUBVECTOR,ISD::SCALAR_TO_VECTOR},

885 VT,Custom);

886

887setOperationAction(ISD::SELECT, VT,Custom);

888setOperationAction(ISD::SELECT_CC, VT,Expand);

889

890setOperationAction({ISD::STEP_VECTOR,ISD::VECTOR_REVERSE}, VT,Custom);

891

892for (MVT OtherVT :MVT::integer_scalable_vector_valuetypes()) {

893setTruncStoreAction(VT, OtherVT,Expand);

894setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, VT,

895 OtherVT,Expand);

896 }

897

898setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT,Custom);

899setOperationAction(ISD::VECTOR_INTERLEAVE, VT,Custom);

900

901// Splice

902setOperationAction(ISD::VECTOR_SPLICE, VT,Custom);

903

904if (Subtarget.hasStdExtZvkb()) {

905setOperationAction(ISD::BSWAP, VT,Legal);

906setOperationAction(ISD::VP_BSWAP, VT,Custom);

907 }else {

908setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT,Expand);

909setOperationAction({ISD::ROTL,ISD::ROTR}, VT,Expand);

910 }

911

912if (Subtarget.hasStdExtZvbb()) {

913setOperationAction(ISD::BITREVERSE, VT,Legal);

914setOperationAction(ISD::VP_BITREVERSE, VT,Custom);

915setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,

916 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},

917 VT,Custom);

918 }else {

919setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT,Expand);

920setOperationAction({ISD::CTLZ,ISD::CTTZ,ISD::CTPOP}, VT,Expand);

921setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,

922 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},

923 VT,Expand);

924

925// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the

926// range of f32.

927EVT FloatVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

928if (isTypeLegal(FloatVT)) {

929setOperationAction({ISD::CTLZ,ISD::CTLZ_ZERO_UNDEF,

930ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,

931 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},

932 VT,Custom);

933 }

934 }

935

936setOperationAction(ISD::VECTOR_COMPRESS, VT,Custom);

937 }

938

939for (MVT VT : VecTupleVTs) {

940if (!isTypeLegal(VT))

941continue;

942

943setOperationAction({ISD::LOAD,ISD::STORE}, VT,Custom);

944 }

945

946// Expand various CCs to best match the RVV ISA, which natively supports UNE

947// but no other unordered comparisons, and supports all ordered comparisons

948// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization

949// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),

950// and we pattern-match those back to the "original", swapping operands once

951// more. This way we catch both operations and both "vf" and "fv" forms with

952// fewer patterns.

953staticconstISD::CondCode VFPCCToExpand[] = {

954ISD::SETO,ISD::SETONE,ISD::SETUEQ,ISD::SETUGT,

955ISD::SETUGE,ISD::SETULT,ISD::SETULE,ISD::SETUO,

956ISD::SETGT,ISD::SETOGT,ISD::SETGE,ISD::SETOGE,

957 };

958

959// TODO: support more ops.

960staticconstunsigned ZvfhminZvfbfminPromoteOps[] = {

961ISD::FMINNUM,ISD::FMAXNUM,ISD::FADD,ISD::FSUB,

962ISD::FMUL,ISD::FMA,ISD::FDIV,ISD::FSQRT,

963ISD::FCEIL,ISD::FTRUNC,ISD::FFLOOR,ISD::FROUND,

964ISD::FROUNDEVEN,ISD::FRINT,ISD::FNEARBYINT,ISD::IS_FPCLASS,

965ISD::SETCC,ISD::FMAXIMUM,ISD::FMINIMUM,ISD::STRICT_FADD,

966ISD::STRICT_FSUB,ISD::STRICT_FMUL,ISD::STRICT_FDIV,ISD::STRICT_FSQRT,

967ISD::STRICT_FMA};

968

969// TODO: support more vp ops.

970staticconstunsigned ZvfhminZvfbfminPromoteVPOps[] = {

971 ISD::VP_FADD,

972 ISD::VP_FSUB,

973 ISD::VP_FMUL,

974 ISD::VP_FDIV,

975 ISD::VP_FMA,

976 ISD::VP_REDUCE_FMIN,

977 ISD::VP_REDUCE_FMAX,

978 ISD::VP_SQRT,

979 ISD::VP_FMINNUM,

980 ISD::VP_FMAXNUM,

981 ISD::VP_FCEIL,

982 ISD::VP_FFLOOR,

983 ISD::VP_FROUND,

984 ISD::VP_FROUNDEVEN,

985 ISD::VP_FROUNDTOZERO,

986 ISD::VP_FRINT,

987 ISD::VP_FNEARBYINT,

988 ISD::VP_SETCC,

989 ISD::VP_FMINIMUM,

990 ISD::VP_FMAXIMUM,

991 ISD::VP_REDUCE_FMINIMUM,

992 ISD::VP_REDUCE_FMAXIMUM};

993

994// Sets common operation actions on RVV floating-point vector types.

995constauto SetCommonVFPActions = [&](MVT VT) {

996setOperationAction(ISD::SPLAT_VECTOR, VT,Legal);

997// RVV has native FP_ROUND & FP_EXTEND conversions where the element type

998// sizes are within one power-of-two of each other. Therefore conversions

999// between vXf16 and vXf64 must be lowered as sequences which convert via

1000// vXf32.

1001setOperationAction({ISD::FP_ROUND,ISD::FP_EXTEND}, VT,Custom);

1002setOperationAction({ISD::LRINT,ISD::LLRINT}, VT,Custom);

1003// Custom-lower insert/extract operations to simplify patterns.

1004setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT}, VT,

1005Custom);

1006// Expand various condition codes (explained above).

1007setCondCodeAction(VFPCCToExpand, VT,Expand);

1008

1009setOperationAction({ISD::FMINNUM,ISD::FMAXNUM}, VT,Legal);

1010setOperationAction({ISD::FMAXIMUM,ISD::FMINIMUM}, VT,Custom);

1011

1012setOperationAction({ISD::FTRUNC,ISD::FCEIL,ISD::FFLOOR,ISD::FROUND,

1013ISD::FROUNDEVEN,ISD::FRINT,ISD::FNEARBYINT,

1014ISD::IS_FPCLASS},

1015 VT,Custom);

1016

1017setOperationAction(FloatingPointVecReduceOps, VT,Custom);

1018

1019// Expand FP operations that need libcalls.

1020setOperationAction(FloatingPointLibCallOps, VT,Expand);

1021

1022setOperationAction(ISD::FCOPYSIGN, VT,Legal);

1023

1024setOperationAction({ISD::LOAD,ISD::STORE}, VT,Custom);

1025

1026setOperationAction({ISD::MLOAD,ISD::MSTORE,ISD::MGATHER,ISD::MSCATTER},

1027 VT,Custom);

1028

1029setOperationAction(

1030 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

1031 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},

1032 VT,Custom);

1033

1034setOperationAction(ISD::SELECT, VT,Custom);

1035setOperationAction(ISD::SELECT_CC, VT,Expand);

1036

1037setOperationAction({ISD::CONCAT_VECTORS,ISD::INSERT_SUBVECTOR,

1038ISD::EXTRACT_SUBVECTOR,ISD::SCALAR_TO_VECTOR},

1039 VT,Custom);

1040

1041setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT,Custom);

1042setOperationAction(ISD::VECTOR_INTERLEAVE, VT,Custom);

1043

1044setOperationAction({ISD::VECTOR_REVERSE,ISD::VECTOR_SPLICE}, VT,Custom);

1045

1046setOperationAction(FloatingPointVPOps, VT,Custom);

1047

1048setOperationAction({ISD::STRICT_FP_EXTEND,ISD::STRICT_FP_ROUND}, VT,

1049Custom);

1050setOperationAction({ISD::STRICT_FADD,ISD::STRICT_FSUB,ISD::STRICT_FMUL,

1051ISD::STRICT_FDIV,ISD::STRICT_FSQRT,ISD::STRICT_FMA},

1052 VT,Legal);

1053setOperationAction({ISD::STRICT_FSETCC,ISD::STRICT_FSETCCS,

1054ISD::STRICT_FTRUNC,ISD::STRICT_FCEIL,

1055ISD::STRICT_FFLOOR,ISD::STRICT_FROUND,

1056ISD::STRICT_FROUNDEVEN,ISD::STRICT_FNEARBYINT},

1057 VT,Custom);

1058

1059setOperationAction(ISD::VECTOR_COMPRESS, VT,Custom);

1060 };

1061

1062// Sets common extload/truncstore actions on RVV floating-point vector

1063// types.

1064constauto SetCommonVFPExtLoadTruncStoreActions =

1065 [&](MVT VT,ArrayRef<MVT::SimpleValueType> SmallerVTs) {

1066for (auto SmallVT : SmallerVTs) {

1067setTruncStoreAction(VT, SmallVT,Expand);

1068setLoadExtAction(ISD::EXTLOAD, VT, SmallVT,Expand);

1069 }

1070 };

1071

1072// Sets common actions for f16 and bf16 for when there's only

1073// zvfhmin/zvfbfmin and we need to promote to f32 for most operations.

1074constauto SetCommonPromoteToF32Actions = [&](MVT VT) {

1075setOperationAction({ISD::FP_ROUND,ISD::FP_EXTEND}, VT,Custom);

1076setOperationAction({ISD::STRICT_FP_ROUND,ISD::STRICT_FP_EXTEND}, VT,

1077Custom);

1078setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT,Custom);

1079setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT,ISD::SELECT}, VT,

1080Custom);

1081setOperationAction(ISD::SELECT_CC, VT,Expand);

1082setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,Custom);

1083setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::CONCAT_VECTORS,

1084ISD::INSERT_SUBVECTOR,ISD::EXTRACT_SUBVECTOR,

1085ISD::VECTOR_DEINTERLEAVE,ISD::VECTOR_INTERLEAVE,

1086ISD::VECTOR_REVERSE,ISD::VECTOR_SPLICE,

1087ISD::VECTOR_COMPRESS},

1088 VT,Custom);

1089MVT EltVT = VT.getVectorElementType();

1090if (isTypeLegal(EltVT))

1091setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,

1092ISD::EXTRACT_VECTOR_ELT},

1093 VT,Custom);

1094else

1095setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},

1096 EltVT,Custom);

1097setOperationAction({ISD::LOAD,ISD::STORE,ISD::MLOAD,ISD::MSTORE,

1098ISD::MGATHER,ISD::MSCATTER, ISD::VP_LOAD,

1099 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

1100 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,

1101 ISD::VP_SCATTER},

1102 VT,Custom);

1103

1104setOperationAction(ISD::FNEG, VT,Expand);

1105setOperationAction(ISD::FABS, VT,Expand);

1106setOperationAction(ISD::FCOPYSIGN, VT,Expand);

1107

1108// Expand FP operations that need libcalls.

1109setOperationAction(FloatingPointLibCallOps, VT,Expand);

1110

1111// Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.

1112if (getLMUL(VT) ==RISCVII::VLMUL::LMUL_8) {

1113setOperationAction(ZvfhminZvfbfminPromoteOps, VT,Custom);

1114setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT,Custom);

1115 }else {

1116MVT F32VecVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

1117setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);

1118setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);

1119 }

1120 };

1121

1122if (Subtarget.hasVInstructionsF16()) {

1123for (MVT VT : F16VecVTs) {

1124if (!isTypeLegal(VT))

1125continue;

1126 SetCommonVFPActions(VT);

1127 }

1128 }elseif (Subtarget.hasVInstructionsF16Minimal()) {

1129for (MVT VT : F16VecVTs) {

1130if (!isTypeLegal(VT))

1131continue;

1132 SetCommonPromoteToF32Actions(VT);

1133 }

1134 }

1135

1136if (Subtarget.hasVInstructionsBF16Minimal()) {

1137for (MVT VT : BF16VecVTs) {

1138if (!isTypeLegal(VT))

1139continue;

1140 SetCommonPromoteToF32Actions(VT);

1141 }

1142 }

1143

1144if (Subtarget.hasVInstructionsF32()) {

1145for (MVT VT : F32VecVTs) {

1146if (!isTypeLegal(VT))

1147continue;

1148 SetCommonVFPActions(VT);

1149 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);

1150 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);

1151 }

1152 }

1153

1154if (Subtarget.hasVInstructionsF64()) {

1155for (MVT VT : F64VecVTs) {

1156if (!isTypeLegal(VT))

1157continue;

1158 SetCommonVFPActions(VT);

1159 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);

1160 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);

1161 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);

1162 }

1163 }

1164

1165if (Subtarget.useRVVForFixedLengthVectors()) {

1166for (MVT VT :MVT::integer_fixedlen_vector_valuetypes()) {

1167if (!useRVVForFixedLengthVectorVT(VT))

1168continue;

1169

1170// By default everything must be expanded.

1171for (unsignedOp = 0;Op <ISD::BUILTIN_OP_END; ++Op)

1172setOperationAction(Op, VT,Expand);

1173for (MVT OtherVT :MVT::integer_fixedlen_vector_valuetypes()) {

1174setTruncStoreAction(VT, OtherVT,Expand);

1175setLoadExtAction({ISD::EXTLOAD,ISD::SEXTLOAD,ISD::ZEXTLOAD}, VT,

1176 OtherVT,Expand);

1177 }

1178

1179// Custom lower fixed vector undefs to scalable vector undefs to avoid

1180// expansion to a build_vector of 0s.

1181setOperationAction(ISD::UNDEF, VT,Custom);

1182

1183// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.

1184setOperationAction({ISD::INSERT_SUBVECTOR,ISD::EXTRACT_SUBVECTOR}, VT,

1185Custom);

1186

1187setOperationAction(

1188 {ISD::BUILD_VECTOR,ISD::CONCAT_VECTORS,ISD::VECTOR_REVERSE}, VT,

1189Custom);

1190

1191setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT},

1192 VT,Custom);

1193

1194setOperationAction(ISD::SCALAR_TO_VECTOR, VT,Custom);

1195

1196setOperationAction({ISD::LOAD,ISD::STORE}, VT,Custom);

1197

1198setOperationAction(ISD::SETCC, VT,Custom);

1199

1200setOperationAction(ISD::SELECT, VT,Custom);

1201

1202setOperationAction(

1203 {ISD::TRUNCATE,ISD::TRUNCATE_SSAT_S,ISD::TRUNCATE_USAT_U}, VT,

1204Custom);

1205

1206setOperationAction(ISD::BITCAST, VT,Custom);

1207

1208setOperationAction(

1209 {ISD::VECREDUCE_AND,ISD::VECREDUCE_OR,ISD::VECREDUCE_XOR}, VT,

1210Custom);

1211

1212setOperationAction(

1213 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,

1214Custom);

1215

1216setOperationAction(

1217 {

1218ISD::SINT_TO_FP,

1219ISD::UINT_TO_FP,

1220ISD::FP_TO_SINT,

1221ISD::FP_TO_UINT,

1222ISD::STRICT_SINT_TO_FP,

1223ISD::STRICT_UINT_TO_FP,

1224ISD::STRICT_FP_TO_SINT,

1225ISD::STRICT_FP_TO_UINT,

1226 },

1227 VT,Custom);

1228setOperationAction({ISD::FP_TO_SINT_SAT,ISD::FP_TO_UINT_SAT}, VT,

1229Custom);

1230

1231setOperationAction(ISD::VECTOR_SHUFFLE, VT,Custom);

1232

1233// Operations below are different for between masks and other vectors.

1234if (VT.getVectorElementType() == MVT::i1) {

1235setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR,ISD::AND,

1236ISD::OR,ISD::XOR},

1237 VT,Custom);

1238

1239setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,

1240 ISD::VP_SETCC, ISD::VP_TRUNCATE},

1241 VT,Custom);

1242

1243setOperationAction(ISD::VP_MERGE, VT,Custom);

1244

1245setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT,Custom);

1246setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT,Custom);

1247continue;

1248 }

1249

1250// Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to

1251// it before type legalization for i64 vectors on RV32. It will then be

1252// type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.

1253// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs

1254// improvements first.

1255if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {

1256setOperationAction(ISD::SPLAT_VECTOR, VT,Legal);

1257setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT,Custom);

1258 }

1259

1260setOperationAction(

1261 {ISD::MLOAD,ISD::MSTORE,ISD::MGATHER,ISD::MSCATTER}, VT,Custom);

1262

1263setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,

1264 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

1265 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,

1266 ISD::VP_SCATTER},

1267 VT,Custom);

1268

1269setOperationAction({ISD::ADD,ISD::MUL,ISD::SUB,ISD::AND,ISD::OR,

1270ISD::XOR,ISD::SDIV,ISD::SREM,ISD::UDIV,

1271ISD::UREM,ISD::SHL,ISD::SRA,ISD::SRL},

1272 VT,Custom);

1273

1274setOperationAction(

1275 {ISD::SMIN,ISD::SMAX,ISD::UMIN,ISD::UMAX,ISD::ABS}, VT,Custom);

1276

1277setOperationAction({ISD::ABDS,ISD::ABDU}, VT,Custom);

1278

1279// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.

1280if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())

1281setOperationAction({ISD::MULHS,ISD::MULHU}, VT,Custom);

1282

1283setOperationAction({ISD::AVGFLOORS,ISD::AVGFLOORU,ISD::AVGCEILS,

1284ISD::AVGCEILU,ISD::SADDSAT,ISD::UADDSAT,

1285ISD::SSUBSAT,ISD::USUBSAT},

1286 VT,Custom);

1287

1288setOperationAction(ISD::VSELECT, VT,Custom);

1289

1290setOperationAction(

1291 {ISD::ANY_EXTEND,ISD::SIGN_EXTEND,ISD::ZERO_EXTEND}, VT,Custom);

1292

1293// Custom-lower reduction operations to set up the corresponding custom

1294// nodes' operands.

1295setOperationAction({ISD::VECREDUCE_ADD,ISD::VECREDUCE_SMAX,

1296ISD::VECREDUCE_SMIN,ISD::VECREDUCE_UMAX,

1297ISD::VECREDUCE_UMIN},

1298 VT,Custom);

1299

1300setOperationAction(IntegerVPOps, VT,Custom);

1301

1302if (Subtarget.hasStdExtZvkb())

1303setOperationAction({ISD::BSWAP,ISD::ROTL,ISD::ROTR}, VT,Custom);

1304

1305if (Subtarget.hasStdExtZvbb()) {

1306setOperationAction({ISD::BITREVERSE,ISD::CTLZ,ISD::CTLZ_ZERO_UNDEF,

1307ISD::CTTZ,ISD::CTTZ_ZERO_UNDEF,ISD::CTPOP},

1308 VT,Custom);

1309 }else {

1310// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the

1311// range of f32.

1312EVT FloatVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

1313if (isTypeLegal(FloatVT))

1314setOperationAction(

1315 {ISD::CTLZ,ISD::CTLZ_ZERO_UNDEF,ISD::CTTZ_ZERO_UNDEF}, VT,

1316Custom);

1317 }

1318

1319setOperationAction(ISD::VECTOR_COMPRESS, VT,Custom);

1320 }

1321

1322for (MVT VT :MVT::fp_fixedlen_vector_valuetypes()) {

1323// There are no extending loads or truncating stores.

1324for (MVT InnerVT :MVT::fp_fixedlen_vector_valuetypes()) {

1325setLoadExtAction(ISD::EXTLOAD, VT, InnerVT,Expand);

1326setTruncStoreAction(VT, InnerVT,Expand);

1327 }

1328

1329if (!useRVVForFixedLengthVectorVT(VT))

1330continue;

1331

1332// By default everything must be expanded.

1333for (unsignedOp = 0;Op <ISD::BUILTIN_OP_END; ++Op)

1334setOperationAction(Op, VT,Expand);

1335

1336// Custom lower fixed vector undefs to scalable vector undefs to avoid

1337// expansion to a build_vector of 0s.

1338setOperationAction(ISD::UNDEF, VT,Custom);

1339

1340setOperationAction({ISD::INSERT_VECTOR_ELT,ISD::EXTRACT_VECTOR_ELT,

1341ISD::CONCAT_VECTORS,ISD::INSERT_SUBVECTOR,

1342ISD::EXTRACT_SUBVECTOR,ISD::VECTOR_REVERSE,

1343ISD::VECTOR_SHUFFLE,ISD::VECTOR_COMPRESS},

1344 VT,Custom);

1345

1346setOperationAction({ISD::LOAD,ISD::STORE,ISD::MLOAD,ISD::MSTORE,

1347ISD::MGATHER,ISD::MSCATTER},

1348 VT,Custom);

1349setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,

1350 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,

1351 ISD::EXPERIMENTAL_VP_STRIDED_STORE},

1352 VT,Custom);

1353

1354setOperationAction({ISD::FP_ROUND,ISD::FP_EXTEND}, VT,Custom);

1355setOperationAction({ISD::STRICT_FP_ROUND,ISD::STRICT_FP_EXTEND}, VT,

1356Custom);

1357

1358if (VT.getVectorElementType() == MVT::f16 &&

1359 !Subtarget.hasVInstructionsF16()) {

1360setOperationAction(ISD::BITCAST, VT,Custom);

1361setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT,Custom);

1362setOperationAction(

1363 {ISD::VP_MERGE, ISD::VP_SELECT,ISD::VSELECT,ISD::SELECT}, VT,

1364Custom);

1365setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,

1366Custom);

1367if (Subtarget.hasStdExtZfhmin()) {

1368setOperationAction(ISD::BUILD_VECTOR, VT,Custom);

1369 }else {

1370// We need to custom legalize f16 build vectors if Zfhmin isn't

1371// available.

1372setOperationAction(ISD::BUILD_VECTOR, MVT::f16,Custom);

1373 }

1374setOperationAction(ISD::FNEG, VT,Expand);

1375setOperationAction(ISD::FABS, VT,Expand);

1376setOperationAction(ISD::FCOPYSIGN, VT,Expand);

1377MVT F32VecVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

1378// Don't promote f16 vector operations to f32 if f32 vector type is

1379// not legal.

1380// TODO: could split the f16 vector into two vectors and do promotion.

1381if (!isTypeLegal(F32VecVT))

1382continue;

1383setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);

1384setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);

1385continue;

1386 }

1387

1388if (VT.getVectorElementType() == MVT::bf16) {

1389setOperationAction(ISD::BITCAST, VT,Custom);

1390setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT,Custom);

1391if (Subtarget.hasStdExtZfbfmin()) {

1392setOperationAction(ISD::BUILD_VECTOR, VT,Custom);

1393 }else {

1394// We need to custom legalize bf16 build vectors if Zfbfmin isn't

1395// available.

1396setOperationAction(ISD::BUILD_VECTOR, MVT::bf16,Custom);

1397 }

1398setOperationAction(

1399 {ISD::VP_MERGE, ISD::VP_SELECT,ISD::VSELECT,ISD::SELECT}, VT,

1400Custom);

1401MVT F32VecVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

1402// Don't promote f16 vector operations to f32 if f32 vector type is

1403// not legal.

1404// TODO: could split the f16 vector into two vectors and do promotion.

1405if (!isTypeLegal(F32VecVT))

1406continue;

1407setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);

1408// TODO: Promote VP ops to fp32.

1409continue;

1410 }

1411

1412setOperationAction({ISD::BUILD_VECTOR,ISD::SCALAR_TO_VECTOR}, VT,

1413Custom);

1414

1415setOperationAction({ISD::FADD,ISD::FSUB,ISD::FMUL,ISD::FDIV,

1416ISD::FNEG,ISD::FABS,ISD::FCOPYSIGN,ISD::FSQRT,

1417ISD::FMA,ISD::FMINNUM,ISD::FMAXNUM,

1418ISD::IS_FPCLASS,ISD::FMAXIMUM,ISD::FMINIMUM},

1419 VT,Custom);

1420

1421setOperationAction({ISD::FTRUNC,ISD::FCEIL,ISD::FFLOOR,ISD::FROUND,

1422ISD::FROUNDEVEN,ISD::FRINT,ISD::FNEARBYINT},

1423 VT,Custom);

1424

1425setCondCodeAction(VFPCCToExpand, VT,Expand);

1426

1427setOperationAction(ISD::SETCC, VT,Custom);

1428setOperationAction({ISD::VSELECT,ISD::SELECT}, VT,Custom);

1429

1430setOperationAction(ISD::BITCAST, VT,Custom);

1431

1432setOperationAction(FloatingPointVecReduceOps, VT,Custom);

1433

1434setOperationAction(FloatingPointVPOps, VT,Custom);

1435

1436setOperationAction(

1437 {ISD::STRICT_FADD,ISD::STRICT_FSUB,ISD::STRICT_FMUL,

1438ISD::STRICT_FDIV,ISD::STRICT_FSQRT,ISD::STRICT_FMA,

1439ISD::STRICT_FSETCC,ISD::STRICT_FSETCCS,ISD::STRICT_FTRUNC,

1440ISD::STRICT_FCEIL,ISD::STRICT_FFLOOR,ISD::STRICT_FROUND,

1441ISD::STRICT_FROUNDEVEN,ISD::STRICT_FNEARBYINT},

1442 VT,Custom);

1443 }

1444

1445// Custom-legalize bitcasts from fixed-length vectors to scalar types.

1446setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32},Custom);

1447if (Subtarget.is64Bit())

1448setOperationAction(ISD::BITCAST, MVT::i64,Custom);

1449if (Subtarget.hasStdExtZfhminOrZhinxmin())

1450setOperationAction(ISD::BITCAST, MVT::f16,Custom);

1451if (Subtarget.hasStdExtZfbfmin())

1452setOperationAction(ISD::BITCAST, MVT::bf16,Custom);

1453if (Subtarget.hasStdExtFOrZfinx())

1454setOperationAction(ISD::BITCAST, MVT::f32,Custom);

1455if (Subtarget.hasStdExtDOrZdinx())

1456setOperationAction(ISD::BITCAST, MVT::f64,Custom);

1457 }

1458 }

1459

1460if (Subtarget.hasStdExtA())

1461setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT,Expand);

1462

1463if (Subtarget.hasForcedAtomics()) {

1464// Force __sync libcalls to be emitted for atomic rmw/cas operations.

1465setOperationAction(

1466 {ISD::ATOMIC_CMP_SWAP,ISD::ATOMIC_SWAP,ISD::ATOMIC_LOAD_ADD,

1467ISD::ATOMIC_LOAD_SUB,ISD::ATOMIC_LOAD_AND,ISD::ATOMIC_LOAD_OR,

1468ISD::ATOMIC_LOAD_XOR,ISD::ATOMIC_LOAD_NAND,ISD::ATOMIC_LOAD_MIN,

1469ISD::ATOMIC_LOAD_MAX,ISD::ATOMIC_LOAD_UMIN,ISD::ATOMIC_LOAD_UMAX},

1470 XLenVT,LibCall);

1471 }

1472

1473if (Subtarget.hasVendorXTHeadMemIdx()) {

1474for (unsignedim : {ISD::PRE_INC,ISD::POST_INC}) {

1475setIndexedLoadAction(im, MVT::i8,Legal);

1476setIndexedStoreAction(im, MVT::i8,Legal);

1477setIndexedLoadAction(im, MVT::i16,Legal);

1478setIndexedStoreAction(im, MVT::i16,Legal);

1479setIndexedLoadAction(im, MVT::i32,Legal);

1480setIndexedStoreAction(im, MVT::i32,Legal);

1481

1482if (Subtarget.is64Bit()) {

1483setIndexedLoadAction(im, MVT::i64,Legal);

1484setIndexedStoreAction(im, MVT::i64,Legal);

1485 }

1486 }

1487 }

1488

1489if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {

1490setIndexedLoadAction(ISD::POST_INC, MVT::i8,Legal);

1491setIndexedLoadAction(ISD::POST_INC, MVT::i16,Legal);

1492setIndexedLoadAction(ISD::POST_INC, MVT::i32,Legal);

1493

1494setIndexedStoreAction(ISD::POST_INC, MVT::i8,Legal);

1495setIndexedStoreAction(ISD::POST_INC, MVT::i16,Legal);

1496setIndexedStoreAction(ISD::POST_INC, MVT::i32,Legal);

1497 }

1498

1499// Function alignments.

1500constAlign FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);

1501setMinFunctionAlignment(FunctionAlignment);

1502// Set preferred alignments.

1503setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());

1504setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());

1505

1506setTargetDAGCombine({ISD::INTRINSIC_VOID,ISD::INTRINSIC_W_CHAIN,

1507ISD::INTRINSIC_WO_CHAIN,ISD::ADD,ISD::SUB,ISD::MUL,

1508ISD::AND,ISD::OR,ISD::XOR,ISD::SETCC,ISD::SELECT});

1509setTargetDAGCombine(ISD::SRA);

1510setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);

1511

1512if (Subtarget.hasStdExtFOrZfinx())

1513setTargetDAGCombine({ISD::FADD,ISD::FMAXNUM,ISD::FMINNUM,ISD::FMUL});

1514

1515if (Subtarget.hasStdExtZbb())

1516setTargetDAGCombine({ISD::UMAX,ISD::UMIN,ISD::SMAX,ISD::SMIN});

1517

1518if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||

1519 Subtarget.hasVInstructions())

1520setTargetDAGCombine(ISD::TRUNCATE);

1521

1522if (Subtarget.hasStdExtZbkb())

1523setTargetDAGCombine(ISD::BITREVERSE);

1524

1525if (Subtarget.hasStdExtFOrZfinx())

1526setTargetDAGCombine({ISD::ZERO_EXTEND,ISD::FP_TO_SINT,ISD::FP_TO_UINT,

1527ISD::FP_TO_SINT_SAT,ISD::FP_TO_UINT_SAT});

1528if (Subtarget.hasVInstructions())

1529setTargetDAGCombine({ISD::FCOPYSIGN,ISD::MGATHER,

1530ISD::MSCATTER, ISD::VP_GATHER,

1531 ISD::VP_SCATTER,ISD::SRA,

1532ISD::SRL,ISD::SHL,

1533ISD::STORE,ISD::SPLAT_VECTOR,

1534ISD::BUILD_VECTOR,ISD::CONCAT_VECTORS,

1535 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_REVERSE,

1536ISD::MUL,ISD::SDIV,

1537ISD::UDIV,ISD::SREM,

1538ISD::UREM,ISD::INSERT_VECTOR_ELT,

1539ISD::ABS,ISD::CTPOP,

1540ISD::VECTOR_SHUFFLE,ISD::VSELECT});

1541

1542if (Subtarget.hasVendorXTHeadMemPair())

1543setTargetDAGCombine({ISD::LOAD,ISD::STORE});

1544if (Subtarget.useRVVForFixedLengthVectors())

1545setTargetDAGCombine(ISD::BITCAST);

1546

1547setLibcallName(RTLIB::FPEXT_F16_F32,"__extendhfsf2");

1548setLibcallName(RTLIB::FPROUND_F32_F16,"__truncsfhf2");

1549

1550// Disable strict node mutation.

1551IsStrictFPEnabled =true;

1552EnableExtLdPromotion =true;

1553

1554// Let the subtarget decide if a predictable select is more expensive than the

1555// corresponding branch. This information is used in CGP/SelectOpt to decide

1556// when to convert selects into branches.

1557PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();

1558

1559MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);

1560MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);

1561

1562MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();

1563MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);

1564MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);

1565

1566MaxStoresPerMemmoveOptSize =

1567 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);

1568MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);

1569

1570MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);

1571MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);

1572}

1573

1574EVT RISCVTargetLowering::getSetCCResultType(constDataLayout &DL,

1575LLVMContext &Context,

1576EVT VT) const{

1577if (!VT.isVector())

1578returngetPointerTy(DL);

1579if (Subtarget.hasVInstructions() &&

1580 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))

1581returnEVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());

1582return VT.changeVectorElementTypeToInteger();

1583}

1584

1585MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const{

1586return Subtarget.getXLenVT();

1587}

1588

1589// Return false if we can lower get_vector_length to a vsetvli intrinsic.

1590bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,

1591unsigned VF,

1592bool IsScalable) const{

1593if (!Subtarget.hasVInstructions())

1594returntrue;

1595

1596if (!IsScalable)

1597returntrue;

1598

1599if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())

1600returntrue;

1601

1602// Don't allow VF=1 if those types are't legal.

1603if (VF <RISCV::RVVBitsPerBlock / Subtarget.getELen())

1604returntrue;

1605

1606// VLEN=32 support is incomplete.

1607if (Subtarget.getRealMinVLen() <RISCV::RVVBitsPerBlock)

1608returntrue;

1609

1610// The maximum VF is for the smallest element width with LMUL=8.

1611// VF must be a power of 2.

1612unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;

1613return VF > MaxVF || !isPowerOf2_32(VF);

1614}

1615

1616boolRISCVTargetLowering::shouldExpandCttzElements(EVT VT) const{

1617return !Subtarget.hasVInstructions() ||

1618 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);

1619}

1620

1621boolRISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

1622constCallInst &I,

1623MachineFunction &MF,

1624unsigned Intrinsic) const{

1625auto &DL =I.getDataLayout();

1626

1627auto SetRVVLoadStoreInfo = [&](unsigned PtrOp,bool IsStore,

1628bool IsUnitStrided,bool UsePtrVal =false) {

1629Info.opc = IsStore ?ISD::INTRINSIC_VOID :ISD::INTRINSIC_W_CHAIN;

1630// We can't use ptrVal if the intrinsic can access memory before the

1631// pointer. This means we can't use it for strided or indexed intrinsics.

1632if (UsePtrVal)

1633Info.ptrVal =I.getArgOperand(PtrOp);

1634else

1635Info.fallbackAddressSpace =

1636I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();

1637Type *MemTy;

1638if (IsStore) {

1639// Store value is the first operand.

1640 MemTy =I.getArgOperand(0)->getType();

1641 }else {

1642// Use return type. If it's segment load, return type is a struct.

1643 MemTy =I.getType();

1644if (MemTy->isStructTy())

1645 MemTy = MemTy->getStructElementType(0);

1646 }

1647if (!IsUnitStrided)

1648 MemTy = MemTy->getScalarType();

1649

1650Info.memVT =getValueType(DL, MemTy);

1651if (MemTy->isTargetExtTy()) {

1652// RISC-V vector tuple type's alignment type should be its element type.

1653if (cast<TargetExtType>(MemTy)->getName() =="riscv.vector.tuple")

1654 MemTy =Type::getIntNTy(

1655 MemTy->getContext(),

1656 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))

1657 ->getZExtValue());

1658Info.align =DL.getABITypeAlign(MemTy);

1659 }else {

1660Info.align =Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);

1661 }

1662Info.size =MemoryLocation::UnknownSize;

1663Info.flags |=

1664 IsStore ?MachineMemOperand::MOStore :MachineMemOperand::MOLoad;

1665returntrue;

1666 };

1667

1668if (I.hasMetadata(LLVMContext::MD_nontemporal))

1669Info.flags |=MachineMemOperand::MONonTemporal;

1670

1671Info.flags |=RISCVTargetLowering::getTargetMMOFlags(I);

1672switch (Intrinsic) {

1673default:

1674returnfalse;

1675case Intrinsic::riscv_masked_atomicrmw_xchg_i32:

1676case Intrinsic::riscv_masked_atomicrmw_add_i32:

1677case Intrinsic::riscv_masked_atomicrmw_sub_i32:

1678case Intrinsic::riscv_masked_atomicrmw_nand_i32:

1679case Intrinsic::riscv_masked_atomicrmw_max_i32:

1680case Intrinsic::riscv_masked_atomicrmw_min_i32:

1681case Intrinsic::riscv_masked_atomicrmw_umax_i32:

1682case Intrinsic::riscv_masked_atomicrmw_umin_i32:

1683case Intrinsic::riscv_masked_cmpxchg_i32:

1684Info.opc =ISD::INTRINSIC_W_CHAIN;

1685Info.memVT = MVT::i32;

1686Info.ptrVal =I.getArgOperand(0);

1687Info.offset = 0;

1688Info.align =Align(4);

1689Info.flags =MachineMemOperand::MOLoad |MachineMemOperand::MOStore |

1690MachineMemOperand::MOVolatile;

1691returntrue;

1692case Intrinsic::riscv_seg2_load:

1693case Intrinsic::riscv_seg3_load:

1694case Intrinsic::riscv_seg4_load:

1695case Intrinsic::riscv_seg5_load:

1696case Intrinsic::riscv_seg6_load:

1697case Intrinsic::riscv_seg7_load:

1698case Intrinsic::riscv_seg8_load:

1699return SetRVVLoadStoreInfo(/*PtrOp*/ 0,/*IsStore*/false,

1700/*IsUnitStrided*/false,/*UsePtrVal*/true);

1701case Intrinsic::riscv_seg2_store:

1702case Intrinsic::riscv_seg3_store:

1703case Intrinsic::riscv_seg4_store:

1704case Intrinsic::riscv_seg5_store:

1705case Intrinsic::riscv_seg6_store:

1706case Intrinsic::riscv_seg7_store:

1707case Intrinsic::riscv_seg8_store:

1708// Operands are (vec, ..., vec, ptr, vl)

1709return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 2,

1710/*IsStore*/true,

1711/*IsUnitStrided*/false,/*UsePtrVal*/true);

1712case Intrinsic::riscv_vle:

1713case Intrinsic::riscv_vle_mask:

1714case Intrinsic::riscv_vleff:

1715case Intrinsic::riscv_vleff_mask:

1716return SetRVVLoadStoreInfo(/*PtrOp*/ 1,

1717/*IsStore*/false,

1718/*IsUnitStrided*/true,

1719/*UsePtrVal*/true);

1720case Intrinsic::riscv_vse:

1721case Intrinsic::riscv_vse_mask:

1722return SetRVVLoadStoreInfo(/*PtrOp*/ 1,

1723/*IsStore*/true,

1724/*IsUnitStrided*/true,

1725/*UsePtrVal*/true);

1726case Intrinsic::riscv_vlse:

1727case Intrinsic::riscv_vlse_mask:

1728case Intrinsic::riscv_vloxei:

1729case Intrinsic::riscv_vloxei_mask:

1730case Intrinsic::riscv_vluxei:

1731case Intrinsic::riscv_vluxei_mask:

1732return SetRVVLoadStoreInfo(/*PtrOp*/ 1,

1733/*IsStore*/false,

1734/*IsUnitStrided*/false);

1735case Intrinsic::riscv_vsse:

1736case Intrinsic::riscv_vsse_mask:

1737case Intrinsic::riscv_vsoxei:

1738case Intrinsic::riscv_vsoxei_mask:

1739case Intrinsic::riscv_vsuxei:

1740case Intrinsic::riscv_vsuxei_mask:

1741return SetRVVLoadStoreInfo(/*PtrOp*/ 1,

1742/*IsStore*/true,

1743/*IsUnitStrided*/false);

1744case Intrinsic::riscv_vlseg2:

1745case Intrinsic::riscv_vlseg3:

1746case Intrinsic::riscv_vlseg4:

1747case Intrinsic::riscv_vlseg5:

1748case Intrinsic::riscv_vlseg6:

1749case Intrinsic::riscv_vlseg7:

1750case Intrinsic::riscv_vlseg8:

1751case Intrinsic::riscv_vlseg2ff:

1752case Intrinsic::riscv_vlseg3ff:

1753case Intrinsic::riscv_vlseg4ff:

1754case Intrinsic::riscv_vlseg5ff:

1755case Intrinsic::riscv_vlseg6ff:

1756case Intrinsic::riscv_vlseg7ff:

1757case Intrinsic::riscv_vlseg8ff:

1758return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 3,

1759/*IsStore*/false,

1760/*IsUnitStrided*/false,/*UsePtrVal*/true);

1761case Intrinsic::riscv_vlseg2_mask:

1762case Intrinsic::riscv_vlseg3_mask:

1763case Intrinsic::riscv_vlseg4_mask:

1764case Intrinsic::riscv_vlseg5_mask:

1765case Intrinsic::riscv_vlseg6_mask:

1766case Intrinsic::riscv_vlseg7_mask:

1767case Intrinsic::riscv_vlseg8_mask:

1768case Intrinsic::riscv_vlseg2ff_mask:

1769case Intrinsic::riscv_vlseg3ff_mask:

1770case Intrinsic::riscv_vlseg4ff_mask:

1771case Intrinsic::riscv_vlseg5ff_mask:

1772case Intrinsic::riscv_vlseg6ff_mask:

1773case Intrinsic::riscv_vlseg7ff_mask:

1774case Intrinsic::riscv_vlseg8ff_mask:

1775return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 5,

1776/*IsStore*/false,

1777/*IsUnitStrided*/false,/*UsePtrVal*/true);

1778case Intrinsic::riscv_vlsseg2:

1779case Intrinsic::riscv_vlsseg3:

1780case Intrinsic::riscv_vlsseg4:

1781case Intrinsic::riscv_vlsseg5:

1782case Intrinsic::riscv_vlsseg6:

1783case Intrinsic::riscv_vlsseg7:

1784case Intrinsic::riscv_vlsseg8:

1785case Intrinsic::riscv_vloxseg2:

1786case Intrinsic::riscv_vloxseg3:

1787case Intrinsic::riscv_vloxseg4:

1788case Intrinsic::riscv_vloxseg5:

1789case Intrinsic::riscv_vloxseg6:

1790case Intrinsic::riscv_vloxseg7:

1791case Intrinsic::riscv_vloxseg8:

1792case Intrinsic::riscv_vluxseg2:

1793case Intrinsic::riscv_vluxseg3:

1794case Intrinsic::riscv_vluxseg4:

1795case Intrinsic::riscv_vluxseg5:

1796case Intrinsic::riscv_vluxseg6:

1797case Intrinsic::riscv_vluxseg7:

1798case Intrinsic::riscv_vluxseg8:

1799return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 4,

1800/*IsStore*/false,

1801/*IsUnitStrided*/false);

1802case Intrinsic::riscv_vlsseg2_mask:

1803case Intrinsic::riscv_vlsseg3_mask:

1804case Intrinsic::riscv_vlsseg4_mask:

1805case Intrinsic::riscv_vlsseg5_mask:

1806case Intrinsic::riscv_vlsseg6_mask:

1807case Intrinsic::riscv_vlsseg7_mask:

1808case Intrinsic::riscv_vlsseg8_mask:

1809case Intrinsic::riscv_vloxseg2_mask:

1810case Intrinsic::riscv_vloxseg3_mask:

1811case Intrinsic::riscv_vloxseg4_mask:

1812case Intrinsic::riscv_vloxseg5_mask:

1813case Intrinsic::riscv_vloxseg6_mask:

1814case Intrinsic::riscv_vloxseg7_mask:

1815case Intrinsic::riscv_vloxseg8_mask:

1816case Intrinsic::riscv_vluxseg2_mask:

1817case Intrinsic::riscv_vluxseg3_mask:

1818case Intrinsic::riscv_vluxseg4_mask:

1819case Intrinsic::riscv_vluxseg5_mask:

1820case Intrinsic::riscv_vluxseg6_mask:

1821case Intrinsic::riscv_vluxseg7_mask:

1822case Intrinsic::riscv_vluxseg8_mask:

1823return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 6,

1824/*IsStore*/false,

1825/*IsUnitStrided*/false);

1826case Intrinsic::riscv_vsseg2:

1827case Intrinsic::riscv_vsseg3:

1828case Intrinsic::riscv_vsseg4:

1829case Intrinsic::riscv_vsseg5:

1830case Intrinsic::riscv_vsseg6:

1831case Intrinsic::riscv_vsseg7:

1832case Intrinsic::riscv_vsseg8:

1833return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 3,

1834/*IsStore*/true,

1835/*IsUnitStrided*/false);

1836case Intrinsic::riscv_vsseg2_mask:

1837case Intrinsic::riscv_vsseg3_mask:

1838case Intrinsic::riscv_vsseg4_mask:

1839case Intrinsic::riscv_vsseg5_mask:

1840case Intrinsic::riscv_vsseg6_mask:

1841case Intrinsic::riscv_vsseg7_mask:

1842case Intrinsic::riscv_vsseg8_mask:

1843return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 4,

1844/*IsStore*/true,

1845/*IsUnitStrided*/false);

1846case Intrinsic::riscv_vssseg2:

1847case Intrinsic::riscv_vssseg3:

1848case Intrinsic::riscv_vssseg4:

1849case Intrinsic::riscv_vssseg5:

1850case Intrinsic::riscv_vssseg6:

1851case Intrinsic::riscv_vssseg7:

1852case Intrinsic::riscv_vssseg8:

1853case Intrinsic::riscv_vsoxseg2:

1854case Intrinsic::riscv_vsoxseg3:

1855case Intrinsic::riscv_vsoxseg4:

1856case Intrinsic::riscv_vsoxseg5:

1857case Intrinsic::riscv_vsoxseg6:

1858case Intrinsic::riscv_vsoxseg7:

1859case Intrinsic::riscv_vsoxseg8:

1860case Intrinsic::riscv_vsuxseg2:

1861case Intrinsic::riscv_vsuxseg3:

1862case Intrinsic::riscv_vsuxseg4:

1863case Intrinsic::riscv_vsuxseg5:

1864case Intrinsic::riscv_vsuxseg6:

1865case Intrinsic::riscv_vsuxseg7:

1866case Intrinsic::riscv_vsuxseg8:

1867return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 4,

1868/*IsStore*/true,

1869/*IsUnitStrided*/false);

1870case Intrinsic::riscv_vssseg2_mask:

1871case Intrinsic::riscv_vssseg3_mask:

1872case Intrinsic::riscv_vssseg4_mask:

1873case Intrinsic::riscv_vssseg5_mask:

1874case Intrinsic::riscv_vssseg6_mask:

1875case Intrinsic::riscv_vssseg7_mask:

1876case Intrinsic::riscv_vssseg8_mask:

1877case Intrinsic::riscv_vsoxseg2_mask:

1878case Intrinsic::riscv_vsoxseg3_mask:

1879case Intrinsic::riscv_vsoxseg4_mask:

1880case Intrinsic::riscv_vsoxseg5_mask:

1881case Intrinsic::riscv_vsoxseg6_mask:

1882case Intrinsic::riscv_vsoxseg7_mask:

1883case Intrinsic::riscv_vsoxseg8_mask:

1884case Intrinsic::riscv_vsuxseg2_mask:

1885case Intrinsic::riscv_vsuxseg3_mask:

1886case Intrinsic::riscv_vsuxseg4_mask:

1887case Intrinsic::riscv_vsuxseg5_mask:

1888case Intrinsic::riscv_vsuxseg6_mask:

1889case Intrinsic::riscv_vsuxseg7_mask:

1890case Intrinsic::riscv_vsuxseg8_mask:

1891return SetRVVLoadStoreInfo(/*PtrOp*/I.arg_size() - 5,

1892/*IsStore*/true,

1893/*IsUnitStrided*/false);

1894 }

1895}

1896

1897boolRISCVTargetLowering::isLegalAddressingMode(constDataLayout &DL,

1898constAddrMode &AM,Type *Ty,

1899unsigned AS,

1900Instruction *I) const{

1901// No global is ever allowed as a base.

1902if (AM.BaseGV)

1903returnfalse;

1904

1905// None of our addressing modes allows a scalable offset

1906if (AM.ScalableOffset)

1907returnfalse;

1908

1909// RVV instructions only support register addressing.

1910if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))

1911return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;

1912

1913// Require a 12-bit signed offset.

1914if (!isInt<12>(AM.BaseOffs))

1915returnfalse;

1916

1917switch (AM.Scale) {

1918case 0:// "r+i" or just "i", depending on HasBaseReg.

1919break;

1920case 1:

1921if (!AM.HasBaseReg)// allow "r+i".

1922break;

1923returnfalse;// disallow "r+r" or "r+r+i".

1924default:

1925returnfalse;

1926 }

1927

1928returntrue;

1929}

1930

1931boolRISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const{

1932return isInt<12>(Imm);

1933}

1934

1935boolRISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const{

1936return isInt<12>(Imm);

1937}

1938

1939// On RV32, 64-bit integers are split into their high and low parts and held

1940// in two different registers, so the trunc is free since the low register can

1941// just be used.

1942// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of

1943// isTruncateFree?

1944boolRISCVTargetLowering::isTruncateFree(Type *SrcTy,Type *DstTy) const{

1945if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())

1946returnfalse;

1947unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();

1948unsigned DestBits = DstTy->getPrimitiveSizeInBits();

1949return (SrcBits == 64 && DestBits == 32);

1950}

1951

1952boolRISCVTargetLowering::isTruncateFree(EVT SrcVT,EVT DstVT) const{

1953// We consider i64->i32 free on RV64 since we have good selection of W

1954// instructions that make promoting operations back to i64 free in many cases.

1955if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||

1956 !DstVT.isInteger())

1957returnfalse;

1958unsigned SrcBits = SrcVT.getSizeInBits();

1959unsigned DestBits = DstVT.getSizeInBits();

1960return (SrcBits == 64 && DestBits == 32);

1961}

1962

1963boolRISCVTargetLowering::isTruncateFree(SDValue Val,EVT VT2) const{

1964EVT SrcVT = Val.getValueType();

1965// free truncate from vnsrl and vnsra

1966if (Subtarget.hasVInstructions() &&

1967 (Val.getOpcode() ==ISD::SRL || Val.getOpcode() ==ISD::SRA) &&

1968 SrcVT.isVector() && VT2.isVector()) {

1969unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();

1970unsigned DestBits = VT2.getVectorElementType().getSizeInBits();

1971if (SrcBits == DestBits * 2) {

1972returntrue;

1973 }

1974 }

1975returnTargetLowering::isTruncateFree(Val, VT2);

1976}

1977

1978boolRISCVTargetLowering::isZExtFree(SDValue Val,EVT VT2) const{

1979// Zexts are free if they can be combined with a load.

1980// Don't advertise i32->i64 zextload as being free for RV64. It interacts

1981// poorly with type legalization of compares preferring sext.

1982if (auto *LD = dyn_cast<LoadSDNode>(Val)) {

1983EVT MemVT = LD->getMemoryVT();

1984if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&

1985 (LD->getExtensionType() ==ISD::NON_EXTLOAD ||

1986 LD->getExtensionType() ==ISD::ZEXTLOAD))

1987returntrue;

1988 }

1989

1990returnTargetLowering::isZExtFree(Val, VT2);

1991}

1992

1993boolRISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,EVT DstVT) const{

1994return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;

1995}

1996

1997boolRISCVTargetLowering::signExtendConstant(constConstantInt *CI) const{

1998return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);

1999}

2000

2001boolRISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const{

2002return Subtarget.hasStdExtZbb() ||

2003 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());

2004}

2005

2006boolRISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const{

2007return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||

2008 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());

2009}

2010

2011boolRISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(

2012constInstruction &AndI) const{

2013// We expect to be able to match a bit extraction instruction if the Zbs

2014// extension is supported and the mask is a power of two. However, we

2015// conservatively return false if the mask would fit in an ANDI instruction,

2016// on the basis that it's possible the sinking+duplication of the AND in

2017// CodeGenPrepare triggered by this hook wouldn't decrease the instruction

2018// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).

2019if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())

2020returnfalse;

2021ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));

2022if (!Mask)

2023returnfalse;

2024return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();

2025}

2026

2027boolRISCVTargetLowering::hasAndNotCompare(SDValue Y) const{

2028EVT VT =Y.getValueType();

2029

2030// FIXME: Support vectors once we have tests.

2031if (VT.isVector())

2032returnfalse;

2033

2034return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&

2035 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());

2036}

2037

2038boolRISCVTargetLowering::hasBitTest(SDValue X,SDValue Y) const{

2039// Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.

2040if (Subtarget.hasStdExtZbs())

2041returnX.getValueType().isScalarInteger();

2042auto *C = dyn_cast<ConstantSDNode>(Y);

2043// XTheadBs provides th.tst (similar to bexti), if Y is a constant

2044if (Subtarget.hasVendorXTHeadBs())

2045returnC !=nullptr;

2046// We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.

2047returnC &&C->getAPIntValue().ule(10);

2048}

2049

2050boolRISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,

2051EVT VT) const{

2052// Only enable for rvv.

2053if (!VT.isVector() || !Subtarget.hasVInstructions())

2054returnfalse;

2055

2056if (VT.isFixedLengthVector() && !isTypeLegal(VT))

2057returnfalse;

2058

2059returntrue;

2060}

2061

2062boolRISCVTargetLowering::shouldConvertConstantLoadToIntImm(constAPInt &Imm,

2063Type *Ty) const{

2064assert(Ty->isIntegerTy());

2065

2066unsigned BitSize = Ty->getIntegerBitWidth();

2067if (BitSize > Subtarget.getXLen())

2068returnfalse;

2069

2070// Fast path, assume 32-bit immediates are cheap.

2071 int64_t Val = Imm.getSExtValue();

2072if (isInt<32>(Val))

2073returntrue;

2074

2075// A constant pool entry may be more aligned thant he load we're trying to

2076// replace. If we don't support unaligned scalar mem, prefer the constant

2077// pool.

2078// TODO: Can the caller pass down the alignment?

2079if (!Subtarget.enableUnalignedScalarMem())

2080returntrue;

2081

2082// Prefer to keep the load if it would require many instructions.

2083// This uses the same threshold we use for constant pools but doesn't

2084// check useConstantPoolForLargeInts.

2085// TODO: Should we keep the load only when we're definitely going to emit a

2086// constant pool?

2087

2088RISCVMatInt::InstSeq Seq =RISCVMatInt::generateInstSeq(Val, Subtarget);

2089return Seq.size() <= Subtarget.getMaxBuildIntsCost();

2090}

2091

2092boolRISCVTargetLowering::

2093 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(

2094SDValue X,ConstantSDNode *XC,ConstantSDNode *CC,SDValue Y,

2095unsigned OldShiftOpcode,unsigned NewShiftOpcode,

2096SelectionDAG &DAG) const{

2097// One interesting pattern that we'd want to form is 'bit extract':

2098// ((1 >> Y) & 1) ==/!= 0

2099// But we also need to be careful not to try to reverse that fold.

2100

2101// Is this '((1 >> Y) & 1)'?

2102if (XC && OldShiftOpcode ==ISD::SRL && XC->isOne())

2103returnfalse;// Keep the 'bit extract' pattern.

2104

2105// Will this be '((1 >> Y) & 1)' after the transform?

2106if (NewShiftOpcode ==ISD::SRL &&CC->isOne())

2107returntrue;// Do form the 'bit extract' pattern.

2108

2109// If 'X' is a constant, and we transform, then we will immediately

2110// try to undo the fold, thus causing endless combine loop.

2111// So only do the transform if X is not a constant. This matches the default

2112// implementation of this function.

2113return !XC;

2114}

2115

2116boolRISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const{

2117unsigned Opc = VecOp.getOpcode();

2118

2119// Assume target opcodes can't be scalarized.

2120// TODO - do we have any exceptions?

2121if (Opc >=ISD::BUILTIN_OP_END || !isBinOp(Opc))

2122returnfalse;

2123

2124// If the vector op is not supported, try to convert to scalar.

2125EVT VecVT = VecOp.getValueType();

2126if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))

2127returntrue;

2128

2129// If the vector op is supported, but the scalar op is not, the transform may

2130// not be worthwhile.

2131// Permit a vector binary operation can be converted to scalar binary

2132// operation which is custom lowered with illegal type.

2133EVT ScalarVT = VecVT.getScalarType();

2134returnisOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||

2135isOperationCustom(Opc, ScalarVT);

2136}

2137

2138boolRISCVTargetLowering::isOffsetFoldingLegal(

2139constGlobalAddressSDNode *GA) const{

2140// In order to maximise the opportunity for common subexpression elimination,

2141// keep a separate ADD node for the global address offset instead of folding

2142// it in the global address node. Later peephole optimisations may choose to

2143// fold it back in when profitable.

2144returnfalse;

2145}

2146

2147// Returns 0-31 if the fli instruction is available for the type and this is

2148// legal FP immediate for the type. Returns -1 otherwise.

2149intRISCVTargetLowering::getLegalZfaFPImm(constAPFloat &Imm,EVT VT) const{

2150if (!Subtarget.hasStdExtZfa())

2151return -1;

2152

2153bool IsSupportedVT =false;

2154if (VT == MVT::f16) {

2155 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();

2156 }elseif (VT == MVT::f32) {

2157 IsSupportedVT =true;

2158 }elseif (VT == MVT::f64) {

2159assert(Subtarget.hasStdExtD() &&"Expect D extension");

2160 IsSupportedVT =true;

2161 }

2162

2163if (!IsSupportedVT)

2164return -1;

2165

2166returnRISCVLoadFPImm::getLoadFPImm(Imm);

2167}

2168

2169boolRISCVTargetLowering::isFPImmLegal(constAPFloat &Imm,EVT VT,

2170bool ForCodeSize) const{

2171bool IsLegalVT =false;

2172if (VT == MVT::f16)

2173 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();

2174elseif (VT == MVT::f32)

2175 IsLegalVT = Subtarget.hasStdExtFOrZfinx();

2176elseif (VT == MVT::f64)

2177 IsLegalVT = Subtarget.hasStdExtDOrZdinx();

2178elseif (VT == MVT::bf16)

2179 IsLegalVT = Subtarget.hasStdExtZfbfmin();

2180

2181if (!IsLegalVT)

2182returnfalse;

2183

2184if (getLegalZfaFPImm(Imm, VT) >= 0)

2185returntrue;

2186

2187// Cannot create a 64 bit floating-point immediate value for rv32.

2188if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {

2189// td can handle +0.0 or -0.0 already.

2190// -0.0 can be created by fmv + fneg.

2191return Imm.isZero();

2192 }

2193

2194// Special case: fmv + fneg

2195if (Imm.isNegZero())

2196returntrue;

2197

2198// Building an integer and then converting requires a fmv at the end of

2199// the integer sequence. The fmv is not required for Zfinx.

2200constint FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;

2201constintCost =

2202 FmvCost +RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),

2203 Subtarget.getXLen(), Subtarget);

2204returnCost <=FPImmCost;

2205}

2206

2207// TODO: This is very conservative.

2208boolRISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT,EVT SrcVT,

2209unsigned Index) const{

2210if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))

2211returnfalse;

2212

2213// Only support extracting a fixed from a fixed vector for now.

2214if (ResVT.isScalableVector() || SrcVT.isScalableVector())

2215returnfalse;

2216

2217EVT EltVT = ResVT.getVectorElementType();

2218assert(EltVT == SrcVT.getVectorElementType() &&"Should hold for node");

2219

2220// The smallest type we can slide is i8.

2221// TODO: We can extract index 0 from a mask vector without a slide.

2222if (EltVT == MVT::i1)

2223returnfalse;

2224

2225unsigned ResElts = ResVT.getVectorNumElements();

2226unsigned SrcElts = SrcVT.getVectorNumElements();

2227

2228unsigned MinVLen = Subtarget.getRealMinVLen();

2229unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();

2230

2231// If we're extracting only data from the first VLEN bits of the source

2232// then we can always do this with an m1 vslidedown.vx. Restricting the

2233// Index ensures we can use a vslidedown.vi.

2234// TODO: We can generalize this when the exact VLEN is known.

2235if (Index + ResElts <= MinVLMAX && Index < 31)

2236returntrue;

2237

2238// Convervatively only handle extracting half of a vector.

2239// TODO: We can do arbitrary slidedowns, but for now only support extracting

2240// the upper half of a vector until we have more test coverage.

2241// TODO: For sizes which aren't multiples of VLEN sizes, this may not be

2242// a cheap extract. However, this case is important in practice for

2243// shuffled extracts of longer vectors. How resolve?

2244return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);

2245}

2246

2247MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,

2248CallingConv::ID CC,

2249EVT VT) const{

2250// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.

2251// We might still end up using a GPR but that will be decided based on ABI.

2252if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&

2253 !Subtarget.hasStdExtZfhminOrZhinxmin())

2254return MVT::f32;

2255

2256MVT PartVT =TargetLowering::getRegisterTypeForCallingConv(Context,CC, VT);

2257

2258return PartVT;

2259}

2260

2261unsigned

2262RISCVTargetLowering::getNumRegisters(LLVMContext &Context,EVT VT,

2263 std::optional<MVT> RegisterVT) const{

2264// Pair inline assembly operand

2265if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&

2266 *RegisterVT == MVT::Untyped)

2267return 1;

2268

2269returnTargetLowering::getNumRegisters(Context, VT, RegisterVT);

2270}

2271

2272unsignedRISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,

2273CallingConv::ID CC,

2274EVT VT) const{

2275// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.

2276// We might still end up using a GPR but that will be decided based on ABI.

2277if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&

2278 !Subtarget.hasStdExtZfhminOrZhinxmin())

2279return 1;

2280

2281returnTargetLowering::getNumRegistersForCallingConv(Context,CC, VT);

2282}

2283

2284unsignedRISCVTargetLowering::getVectorTypeBreakdownForCallingConv(

2285LLVMContext &Context,CallingConv::ID CC,EVT VT,EVT &IntermediateVT,

2286unsigned &NumIntermediates,MVT &RegisterVT) const{

2287unsigned NumRegs =TargetLowering::getVectorTypeBreakdownForCallingConv(

2288 Context,CC, VT, IntermediateVT, NumIntermediates, RegisterVT);

2289

2290return NumRegs;

2291}

2292

2293// Changes the condition code and swaps operands if necessary, so the SetCC

2294// operation matches one of the comparisons supported directly by branches

2295// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare

2296// with 1/-1.

2297staticvoidtranslateSetCCForBranch(constSDLoc &DL,SDValue &LHS,SDValue &RHS,

2298ISD::CondCode &CC,SelectionDAG &DAG) {

2299// If this is a single bit test that can't be handled by ANDI, shift the

2300// bit to be tested to the MSB and perform a signed compare with 0.

2301if (isIntEqualitySetCC(CC) &&isNullConstant(RHS) &&

2302LHS.getOpcode() ==ISD::AND &&LHS.hasOneUse() &&

2303 isa<ConstantSDNode>(LHS.getOperand(1))) {

2304uint64_t Mask =LHS.getConstantOperandVal(1);

2305if ((isPowerOf2_64(Mask) ||isMask_64(Mask)) && !isInt<12>(Mask)) {

2306unsigned ShAmt = 0;

2307if (isPowerOf2_64(Mask)) {

2308CC =CC ==ISD::SETEQ ?ISD::SETGE :ISD::SETLT;

2309 ShAmt =LHS.getValueSizeInBits() - 1 -Log2_64(Mask);

2310 }else {

2311 ShAmt =LHS.getValueSizeInBits() -llvm::bit_width(Mask);

2312 }

2313

2314LHS =LHS.getOperand(0);

2315if (ShAmt != 0)

2316LHS = DAG.getNode(ISD::SHL,DL,LHS.getValueType(),LHS,

2317 DAG.getConstant(ShAmt,DL,LHS.getValueType()));

2318return;

2319 }

2320 }

2321

2322if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {

2323 int64_tC = RHSC->getSExtValue();

2324switch (CC) {

2325default:break;

2326caseISD::SETGT:

2327// Convert X > -1 to X >= 0.

2328if (C == -1) {

2329RHS = DAG.getConstant(0,DL,RHS.getValueType());

2330CC =ISD::SETGE;

2331return;

2332 }

2333break;

2334caseISD::SETLT:

2335// Convert X < 1 to 0 >= X.

2336if (C == 1) {

2337RHS =LHS;

2338LHS = DAG.getConstant(0,DL,RHS.getValueType());

2339CC =ISD::SETGE;

2340return;

2341 }

2342break;

2343 }

2344 }

2345

2346switch (CC) {

2347default:

2348break;

2349caseISD::SETGT:

2350caseISD::SETLE:

2351caseISD::SETUGT:

2352caseISD::SETULE:

2353CC =ISD::getSetCCSwappedOperands(CC);

2354std::swap(LHS,RHS);

2355break;

2356 }

2357}

2358

2359RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {

2360if (VT.isRISCVVectorTuple()) {

2361if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&

2362 VT.SimpleTy <= MVT::riscv_nxv1i8x8)

2363returnRISCVII::LMUL_F8;

2364if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&

2365 VT.SimpleTy <= MVT::riscv_nxv2i8x8)

2366returnRISCVII::LMUL_F4;

2367if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&

2368 VT.SimpleTy <= MVT::riscv_nxv4i8x8)

2369returnRISCVII::LMUL_F2;

2370if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&

2371 VT.SimpleTy <= MVT::riscv_nxv8i8x8)

2372returnRISCVII::LMUL_1;

2373if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&

2374 VT.SimpleTy <= MVT::riscv_nxv16i8x4)

2375returnRISCVII::LMUL_2;

2376if (VT.SimpleTy == MVT::riscv_nxv32i8x2)

2377returnRISCVII::LMUL_4;

2378llvm_unreachable("Invalid vector tuple type LMUL.");

2379 }

2380

2381assert(VT.isScalableVector() &&"Expecting a scalable vector type");

2382unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();

2383if (VT.getVectorElementType() == MVT::i1)

2384 KnownSize *= 8;

2385

2386switch (KnownSize) {

2387default:

2388llvm_unreachable("Invalid LMUL.");

2389case 8:

2390returnRISCVII::VLMUL::LMUL_F8;

2391case 16:

2392returnRISCVII::VLMUL::LMUL_F4;

2393case 32:

2394returnRISCVII::VLMUL::LMUL_F2;

2395case 64:

2396returnRISCVII::VLMUL::LMUL_1;

2397case 128:

2398returnRISCVII::VLMUL::LMUL_2;

2399case 256:

2400returnRISCVII::VLMUL::LMUL_4;

2401case 512:

2402returnRISCVII::VLMUL::LMUL_8;

2403 }

2404}

2405

2406unsignedRISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {

2407switch (LMul) {

2408default:

2409llvm_unreachable("Invalid LMUL.");

2410caseRISCVII::VLMUL::LMUL_F8:

2411caseRISCVII::VLMUL::LMUL_F4:

2412caseRISCVII::VLMUL::LMUL_F2:

2413caseRISCVII::VLMUL::LMUL_1:

2414return RISCV::VRRegClassID;

2415caseRISCVII::VLMUL::LMUL_2:

2416return RISCV::VRM2RegClassID;

2417caseRISCVII::VLMUL::LMUL_4:

2418return RISCV::VRM4RegClassID;

2419caseRISCVII::VLMUL::LMUL_8:

2420return RISCV::VRM8RegClassID;

2421 }

2422}

2423

2424unsignedRISCVTargetLowering::getSubregIndexByMVT(MVT VT,unsigned Index) {

2425RISCVII::VLMUL LMUL =getLMUL(VT);

2426if (LMUL ==RISCVII::VLMUL::LMUL_F8 ||

2427 LMUL ==RISCVII::VLMUL::LMUL_F4 ||

2428 LMUL ==RISCVII::VLMUL::LMUL_F2 ||

2429 LMUL ==RISCVII::VLMUL::LMUL_1) {

2430static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,

2431"Unexpected subreg numbering");

2432return RISCV::sub_vrm1_0 + Index;

2433 }

2434if (LMUL ==RISCVII::VLMUL::LMUL_2) {

2435static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,

2436"Unexpected subreg numbering");

2437return RISCV::sub_vrm2_0 + Index;

2438 }

2439if (LMUL ==RISCVII::VLMUL::LMUL_4) {

2440static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,

2441"Unexpected subreg numbering");

2442return RISCV::sub_vrm4_0 + Index;

2443 }

2444llvm_unreachable("Invalid vector type.");

2445}

2446

2447unsignedRISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {

2448if (VT.isRISCVVectorTuple()) {

2449unsigned NF = VT.getRISCVVectorTupleNumFields();

2450unsigned RegsPerField =

2451 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /

2452 (NF *RISCV::RVVBitsPerBlock));

2453switch (RegsPerField) {

2454case 1:

2455if (NF == 2)

2456return RISCV::VRN2M1RegClassID;

2457if (NF == 3)

2458return RISCV::VRN3M1RegClassID;

2459if (NF == 4)

2460return RISCV::VRN4M1RegClassID;

2461if (NF == 5)

2462return RISCV::VRN5M1RegClassID;

2463if (NF == 6)

2464return RISCV::VRN6M1RegClassID;

2465if (NF == 7)

2466return RISCV::VRN7M1RegClassID;

2467if (NF == 8)

2468return RISCV::VRN8M1RegClassID;

2469break;

2470case 2:

2471if (NF == 2)

2472return RISCV::VRN2M2RegClassID;

2473if (NF == 3)

2474return RISCV::VRN3M2RegClassID;

2475if (NF == 4)

2476return RISCV::VRN4M2RegClassID;

2477break;

2478case 4:

2479assert(NF == 2);

2480return RISCV::VRN2M4RegClassID;

2481default:

2482break;

2483 }

2484llvm_unreachable("Invalid vector tuple type RegClass.");

2485 }

2486

2487if (VT.getVectorElementType() == MVT::i1)

2488return RISCV::VRRegClassID;

2489returngetRegClassIDForLMUL(getLMUL(VT));

2490}

2491

2492// Attempt to decompose a subvector insert/extract between VecVT and

2493// SubVecVT via subregister indices. Returns the subregister index that

2494// can perform the subvector insert/extract with the given element index, as

2495// well as the index corresponding to any leftover subvectors that must be

2496// further inserted/extracted within the register class for SubVecVT.

2497std::pair<unsigned, unsigned>

2498RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

2499MVT VecVT,MVT SubVecVT,unsigned InsertExtractIdx,

2500constRISCVRegisterInfo *TRI) {

2501static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&

2502 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&

2503 RISCV::VRM2RegClassID > RISCV::VRRegClassID),

2504"Register classes not ordered");

2505unsigned VecRegClassID =getRegClassIDForVecVT(VecVT);

2506unsigned SubRegClassID =getRegClassIDForVecVT(SubVecVT);

2507

2508// If VecVT is a vector tuple type, either it's the tuple type with same

2509// RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.

2510if (VecVT.isRISCVVectorTuple()) {

2511if (VecRegClassID == SubRegClassID)

2512return {RISCV::NoSubRegister, 0};

2513

2514assert(SubVecVT.isScalableVector() &&

2515"Only allow scalable vector subvector.");

2516assert(getLMUL(VecVT) ==getLMUL(SubVecVT) &&

2517"Invalid vector tuple insert/extract for vector and subvector with "

2518"different LMUL.");

2519return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};

2520 }

2521

2522// Try to compose a subregister index that takes us from the incoming

2523// LMUL>1 register class down to the outgoing one. At each step we half

2524// the LMUL:

2525// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0

2526// Note that this is not guaranteed to find a subregister index, such as

2527// when we are extracting from one VR type to another.

2528unsigned SubRegIdx = RISCV::NoSubRegister;

2529for (constunsigned RCID :

2530 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})

2531if (VecRegClassID > RCID && SubRegClassID <= RCID) {

2532 VecVT = VecVT.getHalfNumVectorElementsVT();

2533bool IsHi =

2534 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();

2535 SubRegIdx =TRI->composeSubRegIndices(SubRegIdx,

2536getSubregIndexByMVT(VecVT, IsHi));

2537if (IsHi)

2538 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();

2539 }

2540return {SubRegIdx, InsertExtractIdx};

2541}

2542

2543// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar

2544// stores for those types.

2545bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const{

2546return !Subtarget.useRVVForFixedLengthVectors() ||

2547 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);

2548}

2549

2550boolRISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const{

2551if (!ScalarTy.isSimple())

2552returnfalse;

2553switch (ScalarTy.getSimpleVT().SimpleTy) {

2554case MVT::iPTR:

2555return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() :true;

2556case MVT::i8:

2557case MVT::i16:

2558case MVT::i32:

2559returntrue;

2560case MVT::i64:

2561return Subtarget.hasVInstructionsI64();

2562case MVT::f16:

2563return Subtarget.hasVInstructionsF16Minimal();

2564case MVT::bf16:

2565return Subtarget.hasVInstructionsBF16Minimal();

2566case MVT::f32:

2567return Subtarget.hasVInstructionsF32();

2568case MVT::f64:

2569return Subtarget.hasVInstructionsF64();

2570default:

2571returnfalse;

2572 }

2573}

2574

2575

2576unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const{

2577returnNumRepeatedDivisors;

2578}

2579

2580staticSDValue getVLOperand(SDValue Op) {

2581assert((Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

2582Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN) &&

2583"Unexpected opcode");

2584bool HasChain =Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN;

2585unsigned IntNo =Op.getConstantOperandVal(HasChain ? 1 : 0);

2586constRISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =

2587 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);

2588if (!II)

2589returnSDValue();

2590returnOp.getOperand(II->VLOperand + 1 + HasChain);

2591}

2592

2593staticbooluseRVVForFixedLengthVectorVT(MVT VT,

2594constRISCVSubtarget &Subtarget) {

2595assert(VT.isFixedLengthVector() &&"Expected a fixed length vector type!");

2596if (!Subtarget.useRVVForFixedLengthVectors())

2597returnfalse;

2598

2599// We only support a set of vector types with a consistent maximum fixed size

2600// across all supported vector element types to avoid legalization issues.

2601// Therefore -- since the largest is v1024i8/v512i16/etc -- the largest

2602// fixed-length vector type we support is 1024 bytes.

2603if (VT.getFixedSizeInBits() > 1024 * 8)

2604returnfalse;

2605

2606unsigned MinVLen = Subtarget.getRealMinVLen();

2607

2608MVT EltVT = VT.getVectorElementType();

2609

2610// Don't use RVV for vectors we cannot scalarize if required.

2611switch (EltVT.SimpleTy) {

2612// i1 is supported but has different rules.

2613default:

2614returnfalse;

2615case MVT::i1:

2616// Masks can only use a single register.

2617if (VT.getVectorNumElements() > MinVLen)

2618returnfalse;

2619 MinVLen /= 8;

2620break;

2621case MVT::i8:

2622case MVT::i16:

2623case MVT::i32:

2624break;

2625case MVT::i64:

2626if (!Subtarget.hasVInstructionsI64())

2627returnfalse;

2628break;

2629case MVT::f16:

2630if (!Subtarget.hasVInstructionsF16Minimal())

2631returnfalse;

2632break;

2633case MVT::bf16:

2634if (!Subtarget.hasVInstructionsBF16Minimal())

2635returnfalse;

2636break;

2637case MVT::f32:

2638if (!Subtarget.hasVInstructionsF32())

2639returnfalse;

2640break;

2641case MVT::f64:

2642if (!Subtarget.hasVInstructionsF64())

2643returnfalse;

2644break;

2645 }

2646

2647// Reject elements larger than ELEN.

2648if (EltVT.getSizeInBits() > Subtarget.getELen())

2649returnfalse;

2650

2651unsigned LMul =divideCeil(VT.getSizeInBits(), MinVLen);

2652// Don't use RVV for types that don't fit.

2653if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())

2654returnfalse;

2655

2656// TODO: Perhaps an artificial restriction, but worth having whilst getting

2657// the base fixed length RVV support in place.

2658if (!VT.isPow2VectorType())

2659returnfalse;

2660

2661returntrue;

2662}

2663

2664bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const{

2665 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);

2666}

2667

2668// Return the largest legal scalable vector type that matches VT's element type.

2669staticMVT getContainerForFixedLengthVector(constTargetLowering &TLI,MVT VT,

2670constRISCVSubtarget &Subtarget) {

2671// This may be called before legal types are setup.

2672assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||

2673useRVVForFixedLengthVectorVT(VT, Subtarget)) &&

2674"Expected legal fixed length vector!");

2675

2676unsigned MinVLen = Subtarget.getRealMinVLen();

2677unsigned MaxELen = Subtarget.getELen();

2678

2679MVT EltVT = VT.getVectorElementType();

2680switch (EltVT.SimpleTy) {

2681default:

2682llvm_unreachable("unexpected element type for RVV container");

2683case MVT::i1:

2684case MVT::i8:

2685case MVT::i16:

2686case MVT::i32:

2687case MVT::i64:

2688case MVT::bf16:

2689case MVT::f16:

2690case MVT::f32:

2691case MVT::f64: {

2692// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for

2693// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within

2694// each fractional LMUL we support SEW between 8 and LMUL*ELEN.

2695unsigned NumElts =

2696 (VT.getVectorNumElements() *RISCV::RVVBitsPerBlock) / MinVLen;

2697 NumElts = std::max(NumElts,RISCV::RVVBitsPerBlock / MaxELen);

2698assert(isPowerOf2_32(NumElts) &&"Expected power of 2 NumElts");

2699returnMVT::getScalableVectorVT(EltVT, NumElts);

2700 }

2701 }

2702}

2703

2704staticMVT getContainerForFixedLengthVector(SelectionDAG &DAG,MVT VT,

2705constRISCVSubtarget &Subtarget) {

2706returngetContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,

2707 Subtarget);

2708}

2709

2710MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const{

2711 return ::getContainerForFixedLengthVector(*this, VT,getSubtarget());

2712}

2713

2714// Grow V to consume an entire RVV register.

2715staticSDValue convertToScalableVector(EVT VT,SDValue V,SelectionDAG &DAG,

2716constRISCVSubtarget &Subtarget) {

2717assert(VT.isScalableVector() &&

2718"Expected to convert into a scalable vector!");

2719assert(V.getValueType().isFixedLengthVector() &&

2720"Expected a fixed length vector operand!");

2721SDLoc DL(V);

2722SDValue Zero = DAG.getVectorIdxConstant(0,DL);

2723return DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, DAG.getUNDEF(VT), V, Zero);

2724}

2725

2726// Shrink V so it's just big enough to maintain a VT's worth of data.

2727staticSDValue convertFromScalableVector(EVT VT,SDValue V,SelectionDAG &DAG,

2728constRISCVSubtarget &Subtarget) {

2729assert(VT.isFixedLengthVector() &&

2730"Expected to convert into a fixed length vector!");

2731assert(V.getValueType().isScalableVector() &&

2732"Expected a scalable vector operand!");

2733SDLoc DL(V);

2734SDValue Zero = DAG.getConstant(0,DL, Subtarget.getXLenVT());

2735return DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VT, V, Zero);

2736}

2737

2738/// Return the type of the mask type suitable for masking the provided

2739/// vector type. This is simply an i1 element type vector of the same

2740/// (possibly scalable) length.

2741staticMVT getMaskTypeFor(MVT VecVT) {

2742assert(VecVT.isVector());

2743ElementCount EC = VecVT.getVectorElementCount();

2744returnMVT::getVectorVT(MVT::i1, EC);

2745}

2746

2747/// Creates an all ones mask suitable for masking a vector of type VecTy with

2748/// vector length VL. .

2749staticSDValue getAllOnesMask(MVT VecVT,SDValue VL,constSDLoc &DL,

2750SelectionDAG &DAG) {

2751MVT MaskVT =getMaskTypeFor(VecVT);

2752return DAG.getNode(RISCVISD::VMSET_VL,DL, MaskVT, VL);

2753}

2754

2755static std::pair<SDValue, SDValue>

2756getDefaultScalableVLOps(MVT VecVT,constSDLoc &DL,SelectionDAG &DAG,

2757constRISCVSubtarget &Subtarget) {

2758assert(VecVT.isScalableVector() &&"Expecting a scalable vector");

2759SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());

2760SDValue Mask =getAllOnesMask(VecVT, VL,DL, DAG);

2761return {Mask, VL};

2762}

2763

2764static std::pair<SDValue, SDValue>

2765getDefaultVLOps(uint64_t NumElts,MVT ContainerVT,constSDLoc &DL,

2766SelectionDAG &DAG,constRISCVSubtarget &Subtarget) {

2767assert(ContainerVT.isScalableVector() &&"Expecting scalable container type");

2768SDValue VL = DAG.getConstant(NumElts,DL, Subtarget.getXLenVT());

2769SDValue Mask =getAllOnesMask(ContainerVT, VL,DL, DAG);

2770return {Mask, VL};

2771}

2772

2773// Gets the two common "VL" operands: an all-ones mask and the vector length.

2774// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is

2775// the vector type that the fixed-length vector is contained in. Otherwise if

2776// VecVT is scalable, then ContainerVT should be the same as VecVT.

2777static std::pair<SDValue, SDValue>

2778getDefaultVLOps(MVT VecVT,MVT ContainerVT,constSDLoc &DL,SelectionDAG &DAG,

2779constRISCVSubtarget &Subtarget) {

2780if (VecVT.isFixedLengthVector())

2781returngetDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT,DL, DAG,

2782 Subtarget);

2783assert(ContainerVT.isScalableVector() &&"Expecting scalable container type");

2784returngetDefaultScalableVLOps(ContainerVT,DL, DAG, Subtarget);

2785}

2786

2787SDValue RISCVTargetLowering::computeVLMax(MVT VecVT,constSDLoc &DL,

2788SelectionDAG &DAG) const{

2789assert(VecVT.isScalableVector() &&"Expected scalable vector");

2790return DAG.getElementCount(DL, Subtarget.getXLenVT(),

2791 VecVT.getVectorElementCount());

2792}

2793

2794std::pair<unsigned, unsigned>

2795RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,

2796constRISCVSubtarget &Subtarget) {

2797assert(VecVT.isScalableVector() &&"Expected scalable vector");

2798

2799unsigned EltSize = VecVT.getScalarSizeInBits();

2800unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();

2801

2802unsigned VectorBitsMax = Subtarget.getRealMaxVLen();

2803unsigned MaxVLMAX =

2804RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);

2805

2806unsigned VectorBitsMin = Subtarget.getRealMinVLen();

2807unsigned MinVLMAX =

2808RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);

2809

2810return std::make_pair(MinVLMAX, MaxVLMAX);

2811}

2812

2813// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few

2814// of either is (currently) supported. This can get us into an infinite loop

2815// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR

2816// as a ..., etc.

2817// Until either (or both) of these can reliably lower any node, reporting that

2818// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks

2819// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,

2820// which is not desirable.

2821boolRISCVTargetLowering::shouldExpandBuildVectorWithShuffles(

2822EVT VT,unsigned DefinedValues) const{

2823returnfalse;

2824}

2825

2826InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const{

2827// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is

2828// implementation-defined.

2829if (!VT.isVector())

2830returnInstructionCost::getInvalid();

2831unsigned DLenFactor = Subtarget.getDLenFactor();

2832unsignedCost;

2833if (VT.isScalableVector()) {

2834unsigned LMul;

2835bool Fractional;

2836 std::tie(LMul, Fractional) =

2837RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));

2838if (Fractional)

2839Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;

2840else

2841Cost = (LMul * DLenFactor);

2842 }else {

2843Cost =divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);

2844 }

2845returnCost;

2846}

2847

2848

2849/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv

2850/// is generally quadratic in the number of vreg implied by LMUL. Note that

2851/// operand (index and possibly mask) are handled separately.

2852InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const{

2853returngetLMULCost(VT) *getLMULCost(VT);

2854}

2855

2856/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.

2857/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,

2858/// or may track the vrgather.vv cost. It is implementation-dependent.

2859InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const{

2860returngetLMULCost(VT);

2861}

2862

2863/// Return the cost of a vslidedown.vx or vslideup.vx instruction

2864/// for the type VT. (This does not cover the vslide1up or vslide1down

2865/// variants.) Slides may be linear in the number of vregs implied by LMUL,

2866/// or may track the vrgather.vv cost. It is implementation-dependent.

2867InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const{

2868returngetLMULCost(VT);

2869}

2870

2871/// Return the cost of a vslidedown.vi or vslideup.vi instruction

2872/// for the type VT. (This does not cover the vslide1up or vslide1down

2873/// variants.) Slides may be linear in the number of vregs implied by LMUL,

2874/// or may track the vrgather.vv cost. It is implementation-dependent.

2875InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const{

2876returngetLMULCost(VT);

2877}

2878

2879staticSDValue lowerINT_TO_FP(SDValue Op,SelectionDAG &DAG,

2880constRISCVSubtarget &Subtarget) {

2881// f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.

2882// bf16 conversions are always promoted to f32.

2883if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||

2884Op.getValueType() == MVT::bf16) {

2885bool IsStrict =Op->isStrictFPOpcode();

2886

2887SDLoc DL(Op);

2888if (IsStrict) {

2889SDValue Val = DAG.getNode(Op.getOpcode(),DL, {MVT::f32, MVT::Other},

2890 {Op.getOperand(0), Op.getOperand(1)});

2891return DAG.getNode(ISD::STRICT_FP_ROUND,DL,

2892 {Op.getValueType(), MVT::Other},

2893 {Val.getValue(1), Val.getValue(0),

2894 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true)});

2895 }

2896return DAG.getNode(

2897ISD::FP_ROUND,DL,Op.getValueType(),

2898 DAG.getNode(Op.getOpcode(),DL, MVT::f32,Op.getOperand(0)),

2899 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true));

2900 }

2901

2902// Other operations are legal.

2903returnOp;

2904}

2905

2906staticSDValue lowerFP_TO_INT_SAT(SDValue Op,SelectionDAG &DAG,

2907constRISCVSubtarget &Subtarget) {

2908// RISC-V FP-to-int conversions saturate to the destination register size, but

2909// don't produce 0 for nan. We can use a conversion instruction and fix the

2910// nan case with a compare and a select.

2911SDValue Src =Op.getOperand(0);

2912

2913MVT DstVT =Op.getSimpleValueType();

2914EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

2915

2916bool IsSigned =Op.getOpcode() ==ISD::FP_TO_SINT_SAT;

2917

2918if (!DstVT.isVector()) {

2919// For bf16 or for f16 in absense of Zfh, promote to f32, then saturate

2920// the result.

2921if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||

2922 Src.getValueType() == MVT::bf16) {

2923 Src = DAG.getNode(ISD::FP_EXTEND,SDLoc(Op), MVT::f32, Src);

2924 }

2925

2926unsigned Opc;

2927if (SatVT == DstVT)

2928 Opc = IsSigned ?RISCVISD::FCVT_X :RISCVISD::FCVT_XU;

2929elseif (DstVT == MVT::i64 && SatVT == MVT::i32)

2930 Opc = IsSigned ?RISCVISD::FCVT_W_RV64 :RISCVISD::FCVT_WU_RV64;

2931else

2932returnSDValue();

2933// FIXME: Support other SatVTs by clamping before or after the conversion.

2934

2935SDLoc DL(Op);

2936SDValue FpToInt = DAG.getNode(

2937 Opc,DL, DstVT, Src,

2938 DAG.getTargetConstant(RISCVFPRndMode::RTZ,DL, Subtarget.getXLenVT()));

2939

2940if (Opc ==RISCVISD::FCVT_WU_RV64)

2941 FpToInt = DAG.getZeroExtendInReg(FpToInt,DL, MVT::i32);

2942

2943SDValue ZeroInt = DAG.getConstant(0,DL, DstVT);

2944return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,

2945ISD::CondCode::SETUO);

2946 }

2947

2948// Vectors.

2949

2950MVT DstEltVT = DstVT.getVectorElementType();

2951MVT SrcVT = Src.getSimpleValueType();

2952MVT SrcEltVT = SrcVT.getVectorElementType();

2953unsigned SrcEltSize = SrcEltVT.getSizeInBits();

2954unsigned DstEltSize = DstEltVT.getSizeInBits();

2955

2956// Only handle saturating to the destination type.

2957if (SatVT != DstEltVT)

2958returnSDValue();

2959

2960MVT DstContainerVT = DstVT;

2961MVT SrcContainerVT = SrcVT;

2962if (DstVT.isFixedLengthVector()) {

2963 DstContainerVT =getContainerForFixedLengthVector(DAG, DstVT, Subtarget);

2964 SrcContainerVT =getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);

2965assert(DstContainerVT.getVectorElementCount() ==

2966 SrcContainerVT.getVectorElementCount() &&

2967"Expected same element count");

2968 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

2969 }

2970

2971SDLoc DL(Op);

2972

2973auto [Mask, VL] =getDefaultVLOps(DstVT, DstContainerVT,DL, DAG, Subtarget);

2974

2975SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL,DL, Mask.getValueType(),

2976 {Src, Src, DAG.getCondCode(ISD::SETNE),

2977 DAG.getUNDEF(Mask.getValueType()), Mask, VL});

2978

2979// Need to widen by more than 1 step, promote the FP type, then do a widening

2980// convert.

2981if (DstEltSize > (2 * SrcEltSize)) {

2982assert(SrcContainerVT.getVectorElementType() == MVT::f16 &&"Unexpected VT!");

2983MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);

2984 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL,DL, InterVT, Src, Mask, VL);

2985 }

2986

2987MVT CvtContainerVT = DstContainerVT;

2988MVT CvtEltVT = DstEltVT;

2989if (SrcEltSize > (2 * DstEltSize)) {

2990 CvtEltVT =MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);

2991 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);

2992 }

2993

2994unsigned RVVOpc =

2995 IsSigned ?RISCVISD::VFCVT_RTZ_X_F_VL :RISCVISD::VFCVT_RTZ_XU_F_VL;

2996SDValue Res = DAG.getNode(RVVOpc,DL, CvtContainerVT, Src, Mask, VL);

2997

2998while (CvtContainerVT != DstContainerVT) {

2999 CvtEltVT =MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);

3000 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);

3001// Rounding mode here is arbitrary since we aren't shifting out any bits.

3002unsigned ClipOpc = IsSigned ?RISCVISD::TRUNCATE_VECTOR_VL_SSAT

3003 :RISCVISD::TRUNCATE_VECTOR_VL_USAT;

3004 Res = DAG.getNode(ClipOpc,DL, CvtContainerVT, Res, Mask, VL);

3005 }

3006

3007SDValue SplatZero = DAG.getNode(

3008RISCVISD::VMV_V_X_VL,DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),

3009 DAG.getConstant(0,DL, Subtarget.getXLenVT()), VL);

3010 Res = DAG.getNode(RISCVISD::VMERGE_VL,DL, DstContainerVT, IsNan, SplatZero,

3011 Res, DAG.getUNDEF(DstContainerVT), VL);

3012

3013if (DstVT.isFixedLengthVector())

3014 Res =convertFromScalableVector(DstVT, Res, DAG, Subtarget);

3015

3016return Res;

3017}

3018

3019staticSDValue lowerFP_TO_INT(SDValue Op,SelectionDAG &DAG,

3020constRISCVSubtarget &Subtarget) {

3021bool IsStrict =Op->isStrictFPOpcode();

3022SDValue SrcVal =Op.getOperand(IsStrict ? 1 : 0);

3023

3024// f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.

3025// bf16 conversions are always promoted to f32.

3026if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||

3027 SrcVal.getValueType() == MVT::bf16) {

3028SDLoc DL(Op);

3029if (IsStrict) {

3030SDValue Ext =

3031 DAG.getNode(ISD::STRICT_FP_EXTEND,DL, {MVT::f32, MVT::Other},

3032 {Op.getOperand(0), SrcVal});

3033return DAG.getNode(Op.getOpcode(),DL, {Op.getValueType(), MVT::Other},

3034 {Ext.getValue(1), Ext.getValue(0)});

3035 }

3036return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),

3037 DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32, SrcVal));

3038 }

3039

3040// Other operations are legal.

3041returnOp;

3042}

3043

3044staticRISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {

3045switch (Opc) {

3046caseISD::FROUNDEVEN:

3047caseISD::STRICT_FROUNDEVEN:

3048case ISD::VP_FROUNDEVEN:

3049returnRISCVFPRndMode::RNE;

3050caseISD::FTRUNC:

3051caseISD::STRICT_FTRUNC:

3052case ISD::VP_FROUNDTOZERO:

3053returnRISCVFPRndMode::RTZ;

3054caseISD::FFLOOR:

3055caseISD::STRICT_FFLOOR:

3056case ISD::VP_FFLOOR:

3057returnRISCVFPRndMode::RDN;

3058caseISD::FCEIL:

3059caseISD::STRICT_FCEIL:

3060case ISD::VP_FCEIL:

3061returnRISCVFPRndMode::RUP;

3062caseISD::FROUND:

3063caseISD::STRICT_FROUND:

3064case ISD::VP_FROUND:

3065returnRISCVFPRndMode::RMM;

3066caseISD::FRINT:

3067case ISD::VP_FRINT:

3068returnRISCVFPRndMode::DYN;

3069 }

3070

3071returnRISCVFPRndMode::Invalid;

3072}

3073

3074// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND

3075// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to

3076// the integer domain and back. Taking care to avoid converting values that are

3077// nan or already correct.

3078staticSDValue

3079lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op,SelectionDAG &DAG,

3080constRISCVSubtarget &Subtarget) {

3081MVT VT =Op.getSimpleValueType();

3082assert(VT.isVector() &&"Unexpected type");

3083

3084SDLoc DL(Op);

3085

3086SDValue Src =Op.getOperand(0);

3087

3088MVT ContainerVT = VT;

3089if (VT.isFixedLengthVector()) {

3090 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

3091 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

3092 }

3093

3094SDValue Mask, VL;

3095if (Op->isVPOpcode()) {

3096 Mask =Op.getOperand(1);

3097if (VT.isFixedLengthVector())

3098 Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,

3099 Subtarget);

3100 VL =Op.getOperand(2);

3101 }else {

3102 std::tie(Mask, VL) =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

3103 }

3104

3105// Freeze the source since we are increasing the number of uses.

3106 Src = DAG.getFreeze(Src);

3107

3108// We do the conversion on the absolute value and fix the sign at the end.

3109SDValue Abs = DAG.getNode(RISCVISD::FABS_VL,DL, ContainerVT, Src, Mask, VL);

3110

3111// Determine the largest integer that can be represented exactly. This and

3112// values larger than it don't have any fractional bits so don't need to

3113// be converted.

3114constfltSemantics &FltSem = ContainerVT.getFltSemantics();

3115unsigned Precision =APFloat::semanticsPrecision(FltSem);

3116APFloat MaxVal =APFloat(FltSem);

3117 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),

3118/*IsSigned*/false,APFloat::rmNearestTiesToEven);

3119SDValue MaxValNode =

3120 DAG.getConstantFP(MaxVal,DL, ContainerVT.getVectorElementType());

3121SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL,DL, ContainerVT,

3122 DAG.getUNDEF(ContainerVT), MaxValNode, VL);

3123

3124// If abs(Src) was larger than MaxVal or nan, keep it.

3125MVT SetccVT =MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());

3126 Mask =

3127 DAG.getNode(RISCVISD::SETCC_VL,DL, SetccVT,

3128 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),

3129 Mask, Mask, VL});

3130

3131// Truncate to integer and convert back to FP.

3132MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();

3133MVT XLenVT = Subtarget.getXLenVT();

3134SDValue Truncated;

3135

3136switch (Op.getOpcode()) {

3137default:

3138llvm_unreachable("Unexpected opcode");

3139caseISD::FRINT:

3140case ISD::VP_FRINT:

3141caseISD::FCEIL:

3142case ISD::VP_FCEIL:

3143caseISD::FFLOOR:

3144case ISD::VP_FFLOOR:

3145caseISD::FROUND:

3146caseISD::FROUNDEVEN:

3147case ISD::VP_FROUND:

3148case ISD::VP_FROUNDEVEN:

3149case ISD::VP_FROUNDTOZERO: {

3150RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Op.getOpcode());

3151assert(FRM !=RISCVFPRndMode::Invalid);

3152 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL,DL, IntVT, Src, Mask,

3153 DAG.getTargetConstant(FRM,DL, XLenVT), VL);

3154break;

3155 }

3156caseISD::FTRUNC:

3157 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL,DL, IntVT, Src,

3158 Mask, VL);

3159break;

3160caseISD::FNEARBYINT:

3161case ISD::VP_FNEARBYINT:

3162 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL,DL, ContainerVT, Src,

3163 Mask, VL);

3164break;

3165 }

3166

3167// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.

3168if (Truncated.getOpcode() !=RISCVISD::VFROUND_NOEXCEPT_VL)

3169 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL,DL, ContainerVT, Truncated,

3170 Mask, VL);

3171

3172// Restore the original sign so that -0.0 is preserved.

3173 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL,DL, ContainerVT, Truncated,

3174 Src, Src, Mask, VL);

3175

3176if (!VT.isFixedLengthVector())

3177return Truncated;

3178

3179returnconvertFromScalableVector(VT, Truncated, DAG, Subtarget);

3180}

3181

3182// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND

3183// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to

3184// qNan and coverting the new source to integer and back to FP.

3185staticSDValue

3186lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op,SelectionDAG &DAG,

3187constRISCVSubtarget &Subtarget) {

3188SDLoc DL(Op);

3189MVT VT =Op.getSimpleValueType();

3190SDValue Chain =Op.getOperand(0);

3191SDValue Src =Op.getOperand(1);

3192

3193MVT ContainerVT = VT;

3194if (VT.isFixedLengthVector()) {

3195 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

3196 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

3197 }

3198

3199auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

3200

3201// Freeze the source since we are increasing the number of uses.

3202 Src = DAG.getFreeze(Src);

3203

3204// Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.

3205MVT MaskVT = Mask.getSimpleValueType();

3206SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL,DL,

3207 DAG.getVTList(MaskVT, MVT::Other),

3208 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),

3209 DAG.getUNDEF(MaskVT), Mask, VL});

3210 Chain = Unorder.getValue(1);

3211 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL,DL,

3212 DAG.getVTList(ContainerVT, MVT::Other),

3213 {Chain, Src, Src, Src, Unorder, VL});

3214 Chain = Src.getValue(1);

3215

3216// We do the conversion on the absolute value and fix the sign at the end.

3217SDValue Abs = DAG.getNode(RISCVISD::FABS_VL,DL, ContainerVT, Src, Mask, VL);

3218

3219// Determine the largest integer that can be represented exactly. This and

3220// values larger than it don't have any fractional bits so don't need to

3221// be converted.

3222constfltSemantics &FltSem = ContainerVT.getFltSemantics();

3223unsigned Precision =APFloat::semanticsPrecision(FltSem);

3224APFloat MaxVal =APFloat(FltSem);

3225 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),

3226/*IsSigned*/false,APFloat::rmNearestTiesToEven);

3227SDValue MaxValNode =

3228 DAG.getConstantFP(MaxVal,DL, ContainerVT.getVectorElementType());

3229SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL,DL, ContainerVT,

3230 DAG.getUNDEF(ContainerVT), MaxValNode, VL);

3231

3232// If abs(Src) was larger than MaxVal or nan, keep it.

3233 Mask = DAG.getNode(

3234RISCVISD::SETCC_VL,DL, MaskVT,

3235 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});

3236

3237// Truncate to integer and convert back to FP.

3238MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();

3239MVT XLenVT = Subtarget.getXLenVT();

3240SDValue Truncated;

3241

3242switch (Op.getOpcode()) {

3243default:

3244llvm_unreachable("Unexpected opcode");

3245caseISD::STRICT_FCEIL:

3246caseISD::STRICT_FFLOOR:

3247caseISD::STRICT_FROUND:

3248caseISD::STRICT_FROUNDEVEN: {

3249RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Op.getOpcode());

3250assert(FRM !=RISCVFPRndMode::Invalid);

3251 Truncated = DAG.getNode(

3252RISCVISD::STRICT_VFCVT_RM_X_F_VL,DL, DAG.getVTList(IntVT, MVT::Other),

3253 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});

3254break;

3255 }

3256caseISD::STRICT_FTRUNC:

3257 Truncated =

3258 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL,DL,

3259 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);

3260break;

3261caseISD::STRICT_FNEARBYINT:

3262 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL,DL,

3263 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,

3264 Mask, VL);

3265break;

3266 }

3267 Chain = Truncated.getValue(1);

3268

3269// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.

3270if (Op.getOpcode() !=ISD::STRICT_FNEARBYINT) {

3271 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL,DL,

3272 DAG.getVTList(ContainerVT, MVT::Other), Chain,

3273 Truncated, Mask, VL);

3274 Chain = Truncated.getValue(1);

3275 }

3276

3277// Restore the original sign so that -0.0 is preserved.

3278 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL,DL, ContainerVT, Truncated,

3279 Src, Src, Mask, VL);

3280

3281if (VT.isFixedLengthVector())

3282 Truncated =convertFromScalableVector(VT, Truncated, DAG, Subtarget);

3283return DAG.getMergeValues({Truncated, Chain},DL);

3284}

3285

3286staticSDValue

3287lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op,SelectionDAG &DAG,

3288constRISCVSubtarget &Subtarget) {

3289MVT VT =Op.getSimpleValueType();

3290if (VT.isVector())

3291returnlowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);

3292

3293if (DAG.shouldOptForSize())

3294returnSDValue();

3295

3296SDLoc DL(Op);

3297SDValue Src =Op.getOperand(0);

3298

3299// Create an integer the size of the mantissa with the MSB set. This and all

3300// values larger than it don't have any fractional bits so don't need to be

3301// converted.

3302constfltSemantics &FltSem = VT.getFltSemantics();

3303unsigned Precision =APFloat::semanticsPrecision(FltSem);

3304APFloat MaxVal =APFloat(FltSem);

3305 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),

3306/*IsSigned*/false,APFloat::rmNearestTiesToEven);

3307SDValue MaxValNode = DAG.getConstantFP(MaxVal,DL, VT);

3308

3309RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Op.getOpcode());

3310return DAG.getNode(RISCVISD::FROUND,DL, VT, Src, MaxValNode,

3311 DAG.getTargetConstant(FRM,DL, Subtarget.getXLenVT()));

3312}

3313

3314// Expand vector LRINT and LLRINT by converting to the integer domain.

3315staticSDValue lowerVectorXRINT(SDValue Op,SelectionDAG &DAG,

3316constRISCVSubtarget &Subtarget) {

3317MVT VT =Op.getSimpleValueType();

3318assert(VT.isVector() &&"Unexpected type");

3319

3320SDLoc DL(Op);

3321SDValue Src =Op.getOperand(0);

3322MVT ContainerVT = VT;

3323

3324if (VT.isFixedLengthVector()) {

3325 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

3326 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

3327 }

3328

3329auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

3330SDValue Truncated = DAG.getNode(

3331RISCVISD::VFCVT_RM_X_F_VL,DL, ContainerVT, Src, Mask,

3332 DAG.getTargetConstant(RISCVFPRndMode::DYN,DL, Subtarget.getXLenVT()),

3333 VL);

3334

3335if (!VT.isFixedLengthVector())

3336return Truncated;

3337

3338returnconvertFromScalableVector(VT, Truncated, DAG, Subtarget);

3339}

3340

3341staticSDValue

3342getVSlidedown(SelectionDAG &DAG,constRISCVSubtarget &Subtarget,

3343constSDLoc &DL,EVT VT,SDValue Passthru,SDValue Op,

3344SDValue Offset,SDValue Mask,SDValue VL,

3345unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {

3346if (Passthru.isUndef())

3347 Policy =RISCVII::TAIL_AGNOSTIC |RISCVII::MASK_AGNOSTIC;

3348SDValue PolicyOp = DAG.getTargetConstant(Policy,DL, Subtarget.getXLenVT());

3349SDValue Ops[] = {Passthru,Op,Offset, Mask, VL, PolicyOp};

3350return DAG.getNode(RISCVISD::VSLIDEDOWN_VL,DL, VT, Ops);

3351}

3352

3353staticSDValue

3354getVSlideup(SelectionDAG &DAG,constRISCVSubtarget &Subtarget,constSDLoc &DL,

3355EVT VT,SDValue Passthru,SDValue Op,SDValue Offset,SDValue Mask,

3356SDValue VL,

3357unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {

3358if (Passthru.isUndef())

3359 Policy =RISCVII::TAIL_AGNOSTIC |RISCVII::MASK_AGNOSTIC;

3360SDValue PolicyOp = DAG.getTargetConstant(Policy,DL, Subtarget.getXLenVT());

3361SDValue Ops[] = {Passthru,Op,Offset, Mask, VL, PolicyOp};

3362return DAG.getNode(RISCVISD::VSLIDEUP_VL,DL, VT, Ops);

3363}

3364

3365staticMVT getLMUL1VT(MVT VT) {

3366assert(VT.getVectorElementType().getSizeInBits() <= 64 &&

3367"Unexpected vector MVT");

3368returnMVT::getScalableVectorVT(

3369 VT.getVectorElementType(),

3370RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());

3371}

3372

3373structVIDSequence {

3374 int64_tStepNumerator;

3375unsignedStepDenominator;

3376 int64_tAddend;

3377};

3378

3379static std::optional<APInt>getExactInteger(constAPFloat &APF,

3380uint32_t BitWidth) {

3381// We will use a SINT_TO_FP to materialize this constant so we should use a

3382// signed APSInt here.

3383APSInt ValInt(BitWidth,/*IsUnsigned*/false);

3384// We use an arbitrary rounding mode here. If a floating-point is an exact

3385// integer (e.g., 1.0), the rounding mode does not affect the output value. If

3386// the rounding mode changes the output value, then it is not an exact

3387// integer.

3388RoundingMode ArbitraryRM =RoundingMode::TowardZero;

3389bool IsExact;

3390// If it is out of signed integer range, it will return an invalid operation.

3391// If it is not an exact integer, IsExact is false.

3392if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==

3393APFloatBase::opInvalidOp) ||

3394 !IsExact)

3395return std::nullopt;

3396return ValInt.extractBits(BitWidth, 0);

3397}

3398

3399// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]

3400// to the (non-zero) step S and start value X. This can be then lowered as the

3401// RVV sequence (VID * S) + X, for example.

3402// The step S is represented as an integer numerator divided by a positive

3403// denominator. Note that the implementation currently only identifies

3404// sequences in which either the numerator is +/- 1 or the denominator is 1. It

3405// cannot detect 2/3, for example.

3406// Note that this method will also match potentially unappealing index

3407// sequences, like <i32 0, i32 50939494>, however it is left to the caller to

3408// determine whether this is worth generating code for.

3409//

3410// EltSizeInBits is the size of the type that the sequence will be calculated

3411// in, i.e. SEW for build_vectors or XLEN for address calculations.

3412static std::optional<VIDSequence>isSimpleVIDSequence(SDValue Op,

3413unsigned EltSizeInBits) {

3414assert(Op.getOpcode() ==ISD::BUILD_VECTOR &&"Unexpected BUILD_VECTOR");

3415if (!cast<BuildVectorSDNode>(Op)->isConstant())

3416return std::nullopt;

3417bool IsInteger =Op.getValueType().isInteger();

3418

3419 std::optional<unsigned> SeqStepDenom;

3420 std::optional<APInt> SeqStepNum;

3421 std::optional<APInt> SeqAddend;

3422 std::optional<std::pair<APInt, unsigned>> PrevElt;

3423assert(EltSizeInBits >=Op.getValueType().getScalarSizeInBits());

3424

3425// First extract the ops into a list of constant integer values. This may not

3426// be possible for floats if they're not all representable as integers.

3427SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());

3428constunsigned OpSize =Op.getScalarValueSizeInBits();

3429for (auto [Idx, Elt] :enumerate(Op->op_values())) {

3430if (Elt.isUndef()) {

3431 Elts[Idx] = std::nullopt;

3432continue;

3433 }

3434if (IsInteger) {

3435 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);

3436 }else {

3437auto ExactInteger =

3438getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);

3439if (!ExactInteger)

3440return std::nullopt;

3441 Elts[Idx] = *ExactInteger;

3442 }

3443 }

3444

3445for (auto [Idx, Elt] :enumerate(Elts)) {

3446// Assume undef elements match the sequence; we just have to be careful

3447// when interpolating across them.

3448if (!Elt)

3449continue;

3450

3451if (PrevElt) {

3452// Calculate the step since the last non-undef element, and ensure

3453// it's consistent across the entire sequence.

3454unsigned IdxDiff =Idx - PrevElt->second;

3455APInt ValDiff = *Elt - PrevElt->first;

3456

3457// A zero-value value difference means that we're somewhere in the middle

3458// of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a

3459// step change before evaluating the sequence.

3460if (ValDiff == 0)

3461continue;

3462

3463 int64_t Remainder = ValDiff.srem(IdxDiff);

3464// Normalize the step if it's greater than 1.

3465if (Remainder != ValDiff.getSExtValue()) {

3466// The difference must cleanly divide the element span.

3467if (Remainder != 0)

3468return std::nullopt;

3469 ValDiff = ValDiff.sdiv(IdxDiff);

3470 IdxDiff = 1;

3471 }

3472

3473if (!SeqStepNum)

3474 SeqStepNum = ValDiff;

3475elseif (ValDiff != SeqStepNum)

3476return std::nullopt;

3477

3478if (!SeqStepDenom)

3479 SeqStepDenom = IdxDiff;

3480elseif (IdxDiff != *SeqStepDenom)

3481return std::nullopt;

3482 }

3483

3484// Record this non-undef element for later.

3485if (!PrevElt || PrevElt->first != *Elt)

3486 PrevElt = std::make_pair(*Elt,Idx);

3487 }

3488

3489// We need to have logged a step for this to count as a legal index sequence.

3490if (!SeqStepNum || !SeqStepDenom)

3491return std::nullopt;

3492

3493// Loop back through the sequence and validate elements we might have skipped

3494// while waiting for a valid step. While doing this, log any sequence addend.

3495for (auto [Idx, Elt] :enumerate(Elts)) {

3496if (!Elt)

3497continue;

3498APInt ExpectedVal =

3499 (APInt(EltSizeInBits,Idx,/*isSigned=*/false,/*implicitTrunc=*/true) *

3500 *SeqStepNum)

3501 .sdiv(*SeqStepDenom);

3502

3503APInt Addend = *Elt - ExpectedVal;

3504if (!SeqAddend)

3505 SeqAddend = Addend;

3506elseif (Addend != SeqAddend)

3507return std::nullopt;

3508 }

3509

3510assert(SeqAddend &&"Must have an addend if we have a step");

3511

3512returnVIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,

3513 SeqAddend->getSExtValue()};

3514}

3515

3516// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT

3517// and lower it as a VRGATHER_VX_VL from the source vector.

3518staticSDValue matchSplatAsGather(SDValue SplatVal,MVT VT,constSDLoc &DL,

3519SelectionDAG &DAG,

3520constRISCVSubtarget &Subtarget) {

3521if (SplatVal.getOpcode() !=ISD::EXTRACT_VECTOR_ELT)

3522returnSDValue();

3523SDValue Src = SplatVal.getOperand(0);

3524// Don't perform this optimization for i1 vectors, or if the element types are

3525// different

3526// FIXME: Support i1 vectors, maybe by promoting to i8?

3527MVT EltTy = VT.getVectorElementType();

3528MVT SrcVT = Src.getSimpleValueType();

3529if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType())

3530returnSDValue();

3531SDValue Idx = SplatVal.getOperand(1);

3532// The index must be a legal type.

3533if (Idx.getValueType() != Subtarget.getXLenVT())

3534returnSDValue();

3535

3536// Check that we know Idx lies within VT

3537if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {

3538auto *CIdx = dyn_cast<ConstantSDNode>(Idx);

3539if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())

3540returnSDValue();

3541 }

3542

3543// Convert fixed length vectors to scalable

3544MVT ContainerVT = VT;

3545if (VT.isFixedLengthVector())

3546 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

3547

3548MVT SrcContainerVT = SrcVT;

3549if (SrcVT.isFixedLengthVector()) {

3550 SrcContainerVT =getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);

3551 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

3552 }

3553

3554// Put Vec in a VT sized vector

3555if (SrcContainerVT.getVectorMinNumElements() <

3556 ContainerVT.getVectorMinNumElements())

3557 Src = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVT,

3558 DAG.getUNDEF(ContainerVT), Src,

3559 DAG.getVectorIdxConstant(0,DL));

3560else

3561 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerVT, Src,

3562 DAG.getVectorIdxConstant(0,DL));

3563

3564// We checked that Idx fits inside VT earlier

3565auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

3566SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL,DL, ContainerVT, Src,

3567Idx, DAG.getUNDEF(ContainerVT), Mask, VL);

3568if (VT.isFixedLengthVector())

3569 Gather =convertFromScalableVector(VT, Gather, DAG, Subtarget);

3570return Gather;

3571}

3572

3573/// Try and optimize BUILD_VECTORs with "dominant values" - these are values

3574/// which constitute a large proportion of the elements. In such cases we can

3575/// splat a vector with the dominant element and make up the shortfall with

3576/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.

3577/// Note that this includes vectors of 2 elements by association. The

3578/// upper-most element is the "dominant" one, allowing us to use a splat to

3579/// "insert" the upper element, and an insert of the lower element at position

3580/// 0, which improves codegen.

3581staticSDValue lowerBuildVectorViaDominantValues(SDValue Op,SelectionDAG &DAG,

3582constRISCVSubtarget &Subtarget) {

3583MVT VT =Op.getSimpleValueType();

3584assert(VT.isFixedLengthVector() &&"Unexpected vector!");

3585

3586MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

3587

3588SDLoc DL(Op);

3589auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

3590

3591MVT XLenVT = Subtarget.getXLenVT();

3592unsigned NumElts =Op.getNumOperands();

3593

3594SDValue DominantValue;

3595unsigned MostCommonCount = 0;

3596DenseMap<SDValue, unsigned> ValueCounts;

3597unsigned NumUndefElts =

3598count_if(Op->op_values(), [](constSDValue &V) { return V.isUndef(); });

3599

3600// Track the number of scalar loads we know we'd be inserting, estimated as

3601// any non-zero floating-point constant. Other kinds of element are either

3602// already in registers or are materialized on demand. The threshold at which

3603// a vector load is more desirable than several scalar materializion and

3604// vector-insertion instructions is not known.

3605unsigned NumScalarLoads = 0;

3606

3607for (SDValue V :Op->op_values()) {

3608if (V.isUndef())

3609continue;

3610

3611unsigned &Count = ValueCounts[V];

3612if (0 == Count)

3613if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))

3614 NumScalarLoads += !CFP->isExactlyValue(+0.0);

3615

3616// Is this value dominant? In case of a tie, prefer the highest element as

3617// it's cheaper to insert near the beginning of a vector than it is at the

3618// end.

3619if (++Count >= MostCommonCount) {

3620 DominantValue = V;

3621 MostCommonCount = Count;

3622 }

3623 }

3624

3625assert(DominantValue &&"Not expecting an all-undef BUILD_VECTOR");

3626unsigned NumDefElts = NumElts - NumUndefElts;

3627unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;

3628

3629// Don't perform this optimization when optimizing for size, since

3630// materializing elements and inserting them tends to cause code bloat.

3631if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&

3632 (NumElts != 2 ||ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&

3633 ((MostCommonCount > DominantValueCountThreshold) ||

3634 (ValueCounts.size() <=Log2_32(NumDefElts)))) {

3635// Start by splatting the most common element.

3636SDValue Vec = DAG.getSplatBuildVector(VT,DL, DominantValue);

3637

3638DenseSet<SDValue> Processed{DominantValue};

3639

3640// We can handle an insert into the last element (of a splat) via

3641// v(f)slide1down. This is slightly better than the vslideup insert

3642// lowering as it avoids the need for a vector group temporary. It

3643// is also better than using vmerge.vx as it avoids the need to

3644// materialize the mask in a vector register.

3645if (SDValue LastOp =Op->getOperand(Op->getNumOperands() - 1);

3646 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&

3647 LastOp != DominantValue) {

3648 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

3649auto OpCode =

3650 VT.isFloatingPoint() ?RISCVISD::VFSLIDE1DOWN_VL :RISCVISD::VSLIDE1DOWN_VL;

3651if (!VT.isFloatingPoint())

3652 LastOp = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT, LastOp);

3653 Vec = DAG.getNode(OpCode,DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,

3654 LastOp, Mask, VL);

3655 Vec =convertFromScalableVector(VT, Vec, DAG, Subtarget);

3656 Processed.insert(LastOp);

3657 }

3658

3659MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);

3660for (constauto &OpIdx :enumerate(Op->ops())) {

3661constSDValue &V = OpIdx.value();

3662if (V.isUndef() || !Processed.insert(V).second)

3663continue;

3664if (ValueCounts[V] == 1) {

3665 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, VT, Vec, V,

3666 DAG.getVectorIdxConstant(OpIdx.index(),DL));

3667 }else {

3668// Blend in all instances of this value using a VSELECT, using a

3669// mask where each bit signals whether that element is the one

3670// we're after.

3671SmallVector<SDValue> Ops;

3672transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {

3673 return DAG.getConstant(V == V1, DL, XLenVT);

3674 });

3675 Vec = DAG.getNode(ISD::VSELECT,DL, VT,

3676 DAG.getBuildVector(SelMaskTy,DL, Ops),

3677 DAG.getSplatBuildVector(VT,DL, V), Vec);

3678 }

3679 }

3680

3681return Vec;

3682 }

3683

3684returnSDValue();

3685}

3686

3687staticSDValue lowerBuildVectorOfConstants(SDValue Op,SelectionDAG &DAG,

3688constRISCVSubtarget &Subtarget) {

3689MVT VT =Op.getSimpleValueType();

3690assert(VT.isFixedLengthVector() &&"Unexpected vector!");

3691

3692MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

3693

3694SDLoc DL(Op);

3695auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

3696

3697MVT XLenVT = Subtarget.getXLenVT();

3698unsigned NumElts =Op.getNumOperands();

3699

3700if (VT.getVectorElementType() == MVT::i1) {

3701if (ISD::isBuildVectorAllZeros(Op.getNode())) {

3702SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL,DL, ContainerVT, VL);

3703returnconvertFromScalableVector(VT, VMClr, DAG, Subtarget);

3704 }

3705

3706if (ISD::isBuildVectorAllOnes(Op.getNode())) {

3707SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL,DL, ContainerVT, VL);

3708returnconvertFromScalableVector(VT, VMSet, DAG, Subtarget);

3709 }

3710

3711// Lower constant mask BUILD_VECTORs via an integer vector type, in

3712// scalar integer chunks whose bit-width depends on the number of mask

3713// bits and XLEN.

3714// First, determine the most appropriate scalar integer type to use. This

3715// is at most XLenVT, but may be shrunk to a smaller vector element type

3716// according to the size of the final vector - use i8 chunks rather than

3717// XLenVT if we're producing a v8i1. This results in more consistent

3718// codegen across RV32 and RV64.

3719unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());

3720 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());

3721// If we have to use more than one INSERT_VECTOR_ELT then this

3722// optimization is likely to increase code size; avoid peforming it in

3723// such a case. We can use a load from a constant pool in this case.

3724if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)

3725returnSDValue();

3726// Now we can create our integer vector type. Note that it may be larger

3727// than the resulting mask type: v4i1 would use v1i8 as its integer type.

3728unsigned IntegerViaVecElts =divideCeil(NumElts, NumViaIntegerBits);

3729MVT IntegerViaVecVT =

3730MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),

3731 IntegerViaVecElts);

3732

3733uint64_t Bits = 0;

3734unsigned BitPos = 0, IntegerEltIdx = 0;

3735SmallVector<SDValue, 8> Elts(IntegerViaVecElts);

3736

3737for (unsignedI = 0;I < NumElts;) {

3738SDValue V =Op.getOperand(I);

3739bool BitValue = !V.isUndef() && V->getAsZExtVal();

3740 Bits |= ((uint64_t)BitValue << BitPos);

3741 ++BitPos;

3742 ++I;

3743

3744// Once we accumulate enough bits to fill our scalar type or process the

3745// last element, insert into our vector and clear our accumulated data.

3746if (I % NumViaIntegerBits == 0 ||I == NumElts) {

3747if (NumViaIntegerBits <= 32)

3748 Bits = SignExtend64<32>(Bits);

3749SDValue Elt = DAG.getSignedConstant(Bits,DL, XLenVT);

3750 Elts[IntegerEltIdx] = Elt;

3751 Bits = 0;

3752 BitPos = 0;

3753 IntegerEltIdx++;

3754 }

3755 }

3756

3757SDValue Vec = DAG.getBuildVector(IntegerViaVecVT,DL, Elts);

3758

3759if (NumElts < NumViaIntegerBits) {

3760// If we're producing a smaller vector than our minimum legal integer

3761// type, bitcast to the equivalent (known-legal) mask type, and extract

3762// our final mask.

3763assert(IntegerViaVecVT == MVT::v1i8 &&"Unexpected mask vector type");

3764 Vec = DAG.getBitcast(MVT::v8i1, Vec);

3765 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VT, Vec,

3766 DAG.getConstant(0,DL, XLenVT));

3767 }else {

3768// Else we must have produced an integer type with the same size as the

3769// mask type; bitcast for the final result.

3770assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());

3771 Vec = DAG.getBitcast(VT, Vec);

3772 }

3773

3774return Vec;

3775 }

3776

3777if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {

3778unsigned Opc = VT.isFloatingPoint() ?RISCVISD::VFMV_V_F_VL

3779 :RISCVISD::VMV_V_X_VL;

3780if (!VT.isFloatingPoint())

3781Splat = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT,Splat);

3782Splat =

3783 DAG.getNode(Opc,DL, ContainerVT, DAG.getUNDEF(ContainerVT),Splat, VL);

3784returnconvertFromScalableVector(VT,Splat, DAG, Subtarget);

3785 }

3786

3787// Try and match index sequences, which we can lower to the vid instruction

3788// with optional modifications. An all-undef vector is matched by

3789// getSplatValue, above.

3790if (auto SimpleVID =isSimpleVIDSequence(Op,Op.getScalarValueSizeInBits())) {

3791 int64_t StepNumerator = SimpleVID->StepNumerator;

3792unsigned StepDenominator = SimpleVID->StepDenominator;

3793 int64_t Addend = SimpleVID->Addend;

3794

3795assert(StepNumerator != 0 &&"Invalid step");

3796bool Negate =false;

3797 int64_t SplatStepVal = StepNumerator;

3798unsigned StepOpcode =ISD::MUL;

3799// Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it

3800// anyway as the shift of 63 won't fit in uimm5.

3801if (StepNumerator != 1 && StepNumerator !=INT64_MIN &&

3802isPowerOf2_64(std::abs(StepNumerator))) {

3803 Negate = StepNumerator < 0;

3804 StepOpcode =ISD::SHL;

3805 SplatStepVal =Log2_64(std::abs(StepNumerator));

3806 }

3807

3808// Only emit VIDs with suitably-small steps/addends. We use imm5 is a

3809// threshold since it's the immediate value many RVV instructions accept.

3810// There is no vmul.vi instruction so ensure multiply constant can fit in

3811// a single addi instruction.

3812if (((StepOpcode ==ISD::MUL && isInt<12>(SplatStepVal)) ||

3813 (StepOpcode ==ISD::SHL && isUInt<5>(SplatStepVal))) &&

3814isPowerOf2_32(StepDenominator) &&

3815 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {

3816MVT VIDVT =

3817 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;

3818MVT VIDContainerVT =

3819getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);

3820SDValue VID = DAG.getNode(RISCVISD::VID_VL,DL, VIDContainerVT, Mask, VL);

3821// Convert right out of the scalable type so we can use standard ISD

3822// nodes for the rest of the computation. If we used scalable types with

3823// these, we'd lose the fixed-length vector info and generate worse

3824// vsetvli code.

3825 VID =convertFromScalableVector(VIDVT, VID, DAG, Subtarget);

3826if ((StepOpcode ==ISD::MUL && SplatStepVal != 1) ||

3827 (StepOpcode ==ISD::SHL && SplatStepVal != 0)) {

3828SDValue SplatStep = DAG.getSignedConstant(SplatStepVal,DL, VIDVT);

3829 VID = DAG.getNode(StepOpcode,DL, VIDVT, VID, SplatStep);

3830 }

3831if (StepDenominator != 1) {

3832SDValue SplatStep =

3833 DAG.getConstant(Log2_64(StepDenominator),DL, VIDVT);

3834 VID = DAG.getNode(ISD::SRL,DL, VIDVT, VID, SplatStep);

3835 }

3836if (Addend != 0 || Negate) {

3837SDValue SplatAddend = DAG.getSignedConstant(Addend,DL, VIDVT);

3838 VID = DAG.getNode(Negate ?ISD::SUB :ISD::ADD,DL, VIDVT, SplatAddend,

3839 VID);

3840 }

3841if (VT.isFloatingPoint()) {

3842// TODO: Use vfwcvt to reduce register pressure.

3843 VID = DAG.getNode(ISD::SINT_TO_FP,DL, VT, VID);

3844 }

3845return VID;

3846 }

3847 }

3848

3849// For very small build_vectors, use a single scalar insert of a constant.

3850// TODO: Base this on constant rematerialization cost, not size.

3851constunsigned EltBitSize = VT.getScalarSizeInBits();

3852if (VT.getSizeInBits() <= 32 &&

3853ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {

3854MVT ViaIntVT =MVT::getIntegerVT(VT.getSizeInBits());

3855assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&

3856"Unexpected sequence type");

3857// If we can use the original VL with the modified element type, this

3858// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this

3859// be moved into InsertVSETVLI?

3860unsigned ViaVecLen =

3861 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;

3862MVT ViaVecVT =MVT::getVectorVT(ViaIntVT, ViaVecLen);

3863

3864uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);

3865uint64_t SplatValue = 0;

3866// Construct the amalgamated value at this larger vector type.

3867for (constauto &OpIdx :enumerate(Op->op_values())) {

3868constauto &SeqV = OpIdx.value();

3869if (!SeqV.isUndef())

3870 SplatValue |=

3871 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));

3872 }

3873

3874// On RV64, sign-extend from 32 to 64 bits where possible in order to

3875// achieve better constant materializion.

3876// On RV32, we need to sign-extend to use getSignedConstant.

3877if (ViaIntVT == MVT::i32)

3878 SplatValue = SignExtend64<32>(SplatValue);

3879

3880SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, ViaVecVT,

3881 DAG.getUNDEF(ViaVecVT),

3882 DAG.getSignedConstant(SplatValue,DL, XLenVT),

3883 DAG.getVectorIdxConstant(0,DL));

3884if (ViaVecLen != 1)

3885 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL,

3886MVT::getVectorVT(ViaIntVT, 1), Vec,

3887 DAG.getConstant(0,DL, XLenVT));

3888return DAG.getBitcast(VT, Vec);

3889 }

3890

3891

3892// Attempt to detect "hidden" splats, which only reveal themselves as splats

3893// when re-interpreted as a vector with a larger element type. For example,

3894// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1

3895// could be instead splat as

3896// v2i32 = build_vector i32 0x00010000, i32 0x00010000

3897// TODO: This optimization could also work on non-constant splats, but it

3898// would require bit-manipulation instructions to construct the splat value.

3899SmallVector<SDValue> Sequence;

3900constauto *BV = cast<BuildVectorSDNode>(Op);

3901if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&

3902ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&

3903 BV->getRepeatedSequence(Sequence) &&

3904 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {

3905unsigned SeqLen = Sequence.size();

3906MVT ViaIntVT =MVT::getIntegerVT(EltBitSize * SeqLen);

3907assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||

3908 ViaIntVT == MVT::i64) &&

3909"Unexpected sequence type");

3910

3911// If we can use the original VL with the modified element type, this

3912// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this

3913// be moved into InsertVSETVLI?

3914constunsigned RequiredVL = NumElts / SeqLen;

3915constunsigned ViaVecLen =

3916 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?

3917 NumElts : RequiredVL;

3918MVT ViaVecVT =MVT::getVectorVT(ViaIntVT, ViaVecLen);

3919

3920unsigned EltIdx = 0;

3921uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);

3922uint64_t SplatValue = 0;

3923// Construct the amalgamated value which can be splatted as this larger

3924// vector type.

3925for (constauto &SeqV : Sequence) {

3926if (!SeqV.isUndef())

3927 SplatValue |=

3928 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));

3929 EltIdx++;

3930 }

3931

3932// On RV64, sign-extend from 32 to 64 bits where possible in order to

3933// achieve better constant materializion.

3934// On RV32, we need to sign-extend to use getSignedConstant.

3935if (ViaIntVT == MVT::i32)

3936 SplatValue = SignExtend64<32>(SplatValue);

3937

3938// Since we can't introduce illegal i64 types at this stage, we can only

3939// perform an i64 splat on RV32 if it is its own sign-extended value. That

3940// way we can use RVV instructions to splat.

3941assert((ViaIntVT.bitsLE(XLenVT) ||

3942 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&

3943"Unexpected bitcast sequence");

3944if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {

3945SDValue ViaVL =

3946 DAG.getConstant(ViaVecVT.getVectorNumElements(),DL, XLenVT);

3947MVT ViaContainerVT =

3948getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);

3949SDValue Splat =

3950 DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ViaContainerVT,

3951 DAG.getUNDEF(ViaContainerVT),

3952 DAG.getSignedConstant(SplatValue,DL, XLenVT), ViaVL);

3953Splat =convertFromScalableVector(ViaVecVT,Splat, DAG, Subtarget);

3954if (ViaVecLen != RequiredVL)

3955Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL,

3956MVT::getVectorVT(ViaIntVT, RequiredVL),Splat,

3957 DAG.getConstant(0,DL, XLenVT));

3958return DAG.getBitcast(VT,Splat);

3959 }

3960 }

3961

3962// If the number of signbits allows, see if we can lower as a <N x i8>.

3963// Our main goal here is to reduce LMUL (and thus work) required to

3964// build the constant, but we will also narrow if the resulting

3965// narrow vector is known to materialize cheaply.

3966// TODO: We really should be costing the smaller vector. There are

3967// profitable cases this misses.

3968if (EltBitSize > 8 && VT.isInteger() &&

3969 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&

3970 DAG.ComputeMaxSignificantBits(Op) <= 8) {

3971SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),

3972DL,Op->ops());

3973 Source =convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),

3974 Source, DAG, Subtarget);

3975SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL,DL, ContainerVT, Source, Mask, VL);

3976returnconvertFromScalableVector(VT, Res, DAG, Subtarget);

3977 }

3978

3979if (SDValue Res =lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))

3980return Res;

3981

3982// For constant vectors, use generic constant pool lowering. Otherwise,

3983// we'd have to materialize constants in GPRs just to move them into the

3984// vector.

3985returnSDValue();

3986}

3987

3988staticunsignedgetPACKOpcode(unsigned DestBW,

3989constRISCVSubtarget &Subtarget) {

3990switch (DestBW) {

3991default:

3992llvm_unreachable("Unsupported pack size");

3993case 16:

3994return RISCV::PACKH;

3995case 32:

3996return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;

3997case 64:

3998assert(Subtarget.is64Bit());

3999return RISCV::PACK;

4000 }

4001}

4002

4003/// Double the element size of the build vector to reduce the number

4004/// of vslide1down in the build vector chain. In the worst case, this

4005/// trades three scalar operations for 1 vector operation. Scalar

4006/// operations are generally lower latency, and for out-of-order cores

4007/// we also benefit from additional parallelism.

4008staticSDValue lowerBuildVectorViaPacking(SDValue Op,SelectionDAG &DAG,

4009constRISCVSubtarget &Subtarget) {

4010SDLoc DL(Op);

4011MVT VT =Op.getSimpleValueType();

4012assert(VT.isFixedLengthVector() &&"Unexpected vector!");

4013MVT ElemVT = VT.getVectorElementType();

4014if (!ElemVT.isInteger())

4015returnSDValue();

4016

4017// TODO: Relax these architectural restrictions, possibly with costing

4018// of the actual instructions required.

4019if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())

4020returnSDValue();

4021

4022unsigned NumElts = VT.getVectorNumElements();

4023unsigned ElemSizeInBits = ElemVT.getSizeInBits();

4024if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||

4025 NumElts % 2 != 0)

4026returnSDValue();

4027

4028// Produce [B,A] packed into a type twice as wide. Note that all

4029// scalars are XLenVT, possibly masked (see below).

4030MVT XLenVT = Subtarget.getXLenVT();

4031SDValue Mask = DAG.getConstant(

4032APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits),DL, XLenVT);

4033auto pack = [&](SDValue A,SDValue B) {

4034// Bias the scheduling of the inserted operations to near the

4035// definition of the element - this tends to reduce register

4036// pressure overall.

4037SDLoc ElemDL(B);

4038if (Subtarget.hasStdExtZbkb())

4039// Note that we're relying on the high bits of the result being

4040// don't care. For PACKW, the result is *sign* extended.

4041returnSDValue(

4042 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),

4043 ElemDL, XLenVT,A,B),

4044 0);

4045

4046A = DAG.getNode(ISD::AND,SDLoc(A), XLenVT,A, Mask);

4047B = DAG.getNode(ISD::AND,SDLoc(B), XLenVT,B, Mask);

4048SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);

4049return DAG.getNode(ISD::OR, ElemDL, XLenVT,A,

4050 DAG.getNode(ISD::SHL, ElemDL, XLenVT,B, ShtAmt),

4051SDNodeFlags::Disjoint);

4052 };

4053

4054SmallVector<SDValue> NewOperands;

4055 NewOperands.reserve(NumElts / 2);

4056for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)

4057 NewOperands.push_back(pack(Op.getOperand(i),Op.getOperand(i + 1)));

4058assert(NumElts == NewOperands.size() * 2);

4059MVT WideVT =MVT::getIntegerVT(ElemSizeInBits * 2);

4060MVT WideVecVT =MVT::getVectorVT(WideVT, NumElts / 2);

4061return DAG.getNode(ISD::BITCAST,DL, VT,

4062 DAG.getBuildVector(WideVecVT,DL, NewOperands));

4063}

4064

4065staticSDValue lowerBUILD_VECTOR(SDValue Op,SelectionDAG &DAG,

4066constRISCVSubtarget &Subtarget) {

4067MVT VT =Op.getSimpleValueType();

4068assert(VT.isFixedLengthVector() &&"Unexpected vector!");

4069

4070MVT EltVT = VT.getVectorElementType();

4071MVT XLenVT = Subtarget.getXLenVT();

4072

4073SDLoc DL(Op);

4074

4075// Proper support for f16 requires Zvfh. bf16 always requires special

4076// handling. We need to cast the scalar to integer and create an integer

4077// build_vector.

4078if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {

4079MVT IVT = VT.changeVectorElementType(MVT::i16);

4080SmallVector<SDValue, 16> NewOps(Op.getNumOperands());

4081for (unsignedI = 0, E =Op.getNumOperands();I != E; ++I) {

4082SDValue Elem =Op.getOperand(I);

4083if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||

4084 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {

4085// Called by LegalizeDAG, we need to use XLenVT operations since we

4086// can't create illegal types.

4087if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {

4088// Manually constant fold so the integer build_vector can be lowered

4089// better. Waiting for DAGCombine will be too late.

4090APInt V =

4091C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());

4092 NewOps[I] = DAG.getConstant(V,DL, XLenVT);

4093 }else {

4094 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Elem);

4095 }

4096 }else {

4097// Called by scalar type legalizer, we can use i16.

4098 NewOps[I] = DAG.getBitcast(MVT::i16,Op.getOperand(I));

4099 }

4100 }

4101SDValue Res = DAG.getNode(ISD::BUILD_VECTOR,DL, IVT, NewOps);

4102return DAG.getBitcast(VT, Res);

4103 }

4104

4105if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||

4106ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))

4107returnlowerBuildVectorOfConstants(Op, DAG, Subtarget);

4108

4109MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

4110

4111auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

4112

4113if (VT.getVectorElementType() == MVT::i1) {

4114// A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask

4115// vector type, we have a legal equivalently-sized i8 type, so we can use

4116// that.

4117MVT WideVecVT = VT.changeVectorElementType(MVT::i8);

4118SDValue VecZero = DAG.getConstant(0,DL, WideVecVT);

4119

4120SDValue WideVec;

4121if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {

4122// For a splat, perform a scalar truncate before creating the wider

4123// vector.

4124Splat = DAG.getNode(ISD::AND,DL,Splat.getValueType(),Splat,

4125 DAG.getConstant(1,DL,Splat.getValueType()));

4126 WideVec = DAG.getSplatBuildVector(WideVecVT,DL,Splat);

4127 }else {

4128SmallVector<SDValue, 8> Ops(Op->op_values());

4129 WideVec = DAG.getBuildVector(WideVecVT,DL, Ops);

4130SDValue VecOne = DAG.getConstant(1,DL, WideVecVT);

4131 WideVec = DAG.getNode(ISD::AND,DL, WideVecVT, WideVec, VecOne);

4132 }

4133

4134return DAG.getSetCC(DL, VT, WideVec, VecZero,ISD::SETNE);

4135 }

4136

4137if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {

4138if (auto Gather =matchSplatAsGather(Splat, VT,DL, DAG, Subtarget))

4139return Gather;

4140unsigned Opc = VT.isFloatingPoint() ?RISCVISD::VFMV_V_F_VL

4141 :RISCVISD::VMV_V_X_VL;

4142if (!VT.isFloatingPoint())

4143Splat = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT,Splat);

4144Splat =

4145 DAG.getNode(Opc,DL, ContainerVT, DAG.getUNDEF(ContainerVT),Splat, VL);

4146returnconvertFromScalableVector(VT,Splat, DAG, Subtarget);

4147 }

4148

4149if (SDValue Res =lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))

4150return Res;

4151

4152// If we're compiling for an exact VLEN value, we can split our work per

4153// register in the register group.

4154if (constauto VLen = Subtarget.getRealVLen();

4155 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {

4156MVT ElemVT = VT.getVectorElementType();

4157unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();

4158EVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

4159MVT OneRegVT =MVT::getVectorVT(ElemVT, ElemsPerVReg);

4160MVT M1VT =getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);

4161assert(M1VT ==getLMUL1VT(M1VT));

4162

4163// The following semantically builds up a fixed length concat_vector

4164// of the component build_vectors. We eagerly lower to scalable and

4165// insert_subvector here to avoid DAG combining it back to a large

4166// build_vector.

4167SmallVector<SDValue> BuildVectorOps(Op->ops());

4168unsigned NumOpElts = M1VT.getVectorMinNumElements();

4169SDValue Vec = DAG.getUNDEF(ContainerVT);

4170for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {

4171auto OneVRegOfOps =ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);

4172SDValue SubBV =

4173 DAG.getNode(ISD::BUILD_VECTOR,DL, OneRegVT, OneVRegOfOps);

4174 SubBV =convertToScalableVector(M1VT, SubBV, DAG, Subtarget);

4175unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;

4176 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVT, Vec, SubBV,

4177 DAG.getVectorIdxConstant(InsertIdx,DL));

4178 }

4179returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);

4180 }

4181

4182// If we're about to resort to vslide1down (or stack usage), pack our

4183// elements into the widest scalar type we can. This will force a VL/VTYPE

4184// toggle, but reduces the critical path, the number of vslide1down ops

4185// required, and possibly enables scalar folds of the values.

4186if (SDValue Res =lowerBuildVectorViaPacking(Op, DAG, Subtarget))

4187return Res;

4188

4189// For m1 vectors, if we have non-undef values in both halves of our vector,

4190// split the vector into low and high halves, build them separately, then

4191// use a vselect to combine them. For long vectors, this cuts the critical

4192// path of the vslide1down sequence in half, and gives us an opportunity

4193// to special case each half independently. Note that we don't change the

4194// length of the sub-vectors here, so if both fallback to the generic

4195// vslide1down path, we should be able to fold the vselect into the final

4196// vslidedown (for the undef tail) for the first half w/ masking.

4197unsigned NumElts = VT.getVectorNumElements();

4198unsigned NumUndefElts =

4199count_if(Op->op_values(), [](constSDValue &V) { return V.isUndef(); });

4200unsigned NumDefElts = NumElts - NumUndefElts;

4201if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&

4202 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {

4203SmallVector<SDValue> SubVecAOps, SubVecBOps;

4204SmallVector<SDValue> MaskVals;

4205SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));

4206 SubVecAOps.reserve(NumElts);

4207 SubVecBOps.reserve(NumElts);

4208for (unsigned i = 0; i < NumElts; i++) {

4209SDValue Elem =Op->getOperand(i);

4210if (i < NumElts / 2) {

4211 SubVecAOps.push_back(Elem);

4212 SubVecBOps.push_back(UndefElem);

4213 }else {

4214 SubVecAOps.push_back(UndefElem);

4215 SubVecBOps.push_back(Elem);

4216 }

4217bool SelectMaskVal = (i < NumElts / 2);

4218 MaskVals.push_back(DAG.getConstant(SelectMaskVal,DL, XLenVT));

4219 }

4220assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&

4221 MaskVals.size() == NumElts);

4222

4223SDValue SubVecA = DAG.getBuildVector(VT,DL, SubVecAOps);

4224SDValue SubVecB = DAG.getBuildVector(VT,DL, SubVecBOps);

4225MVT MaskVT =MVT::getVectorVT(MVT::i1, NumElts);

4226SDValue SelectMask = DAG.getBuildVector(MaskVT,DL, MaskVals);

4227return DAG.getNode(ISD::VSELECT,DL, VT, SelectMask, SubVecA, SubVecB);

4228 }

4229

4230// Cap the cost at a value linear to the number of elements in the vector.

4231// The default lowering is to use the stack. The vector store + scalar loads

4232// is linear in VL. However, at high lmuls vslide1down and vslidedown end up

4233// being (at least) linear in LMUL. As a result, using the vslidedown

4234// lowering for every element ends up being VL*LMUL..

4235// TODO: Should we be directly costing the stack alternative? Doing so might

4236// give us a more accurate upper bound.

4237InstructionCost LinearBudget = VT.getVectorNumElements() * 2;

4238

4239// TODO: unify with TTI getSlideCost.

4240InstructionCost PerSlideCost = 1;

4241switch (RISCVTargetLowering::getLMUL(ContainerVT)) {

4242default:break;

4243caseRISCVII::VLMUL::LMUL_2:

4244 PerSlideCost = 2;

4245break;

4246caseRISCVII::VLMUL::LMUL_4:

4247 PerSlideCost = 4;

4248break;

4249caseRISCVII::VLMUL::LMUL_8:

4250 PerSlideCost = 8;

4251break;

4252 }

4253

4254// TODO: Should we be using the build instseq then cost + evaluate scheme

4255// we use for integer constants here?

4256unsigned UndefCount = 0;

4257for (constSDValue &V :Op->ops()) {

4258if (V.isUndef()) {

4259 UndefCount++;

4260continue;

4261 }

4262if (UndefCount) {

4263 LinearBudget -= PerSlideCost;

4264 UndefCount = 0;

4265 }

4266 LinearBudget -= PerSlideCost;

4267 }

4268if (UndefCount) {

4269 LinearBudget -= PerSlideCost;

4270 }

4271

4272if (LinearBudget < 0)

4273returnSDValue();

4274

4275assert((!VT.isFloatingPoint() ||

4276 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&

4277"Illegal type which will result in reserved encoding");

4278

4279constunsigned Policy =RISCVII::TAIL_AGNOSTIC |RISCVII::MASK_AGNOSTIC;

4280

4281SDValue Vec;

4282 UndefCount = 0;

4283for (SDValue V :Op->ops()) {

4284if (V.isUndef()) {

4285 UndefCount++;

4286continue;

4287 }

4288

4289// Start our sequence with a TA splat in the hopes that hardware is able to

4290// recognize there's no dependency on the prior value of our temporary

4291// register.

4292if (!Vec) {

4293 Vec = DAG.getSplatVector(VT,DL, V);

4294 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

4295 UndefCount = 0;

4296continue;

4297 }

4298

4299if (UndefCount) {

4300constSDValue Offset = DAG.getConstant(UndefCount,DL, Subtarget.getXLenVT());

4301 Vec =getVSlidedown(DAG, Subtarget,DL, ContainerVT, DAG.getUNDEF(ContainerVT),

4302 Vec,Offset, Mask, VL, Policy);

4303 UndefCount = 0;

4304 }

4305auto OpCode =

4306 VT.isFloatingPoint() ?RISCVISD::VFSLIDE1DOWN_VL :RISCVISD::VSLIDE1DOWN_VL;

4307if (!VT.isFloatingPoint())

4308 V = DAG.getNode(ISD::ANY_EXTEND,DL, Subtarget.getXLenVT(), V);

4309 Vec = DAG.getNode(OpCode,DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,

4310 V, Mask, VL);

4311 }

4312if (UndefCount) {

4313constSDValue Offset = DAG.getConstant(UndefCount,DL, Subtarget.getXLenVT());

4314 Vec =getVSlidedown(DAG, Subtarget,DL, ContainerVT, DAG.getUNDEF(ContainerVT),

4315 Vec,Offset, Mask, VL, Policy);

4316 }

4317returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);

4318}

4319

4320staticSDValue splatPartsI64WithVL(constSDLoc &DL,MVT VT,SDValue Passthru,

4321SDValue Lo,SDValue Hi,SDValue VL,

4322SelectionDAG &DAG) {

4323if (!Passthru)

4324 Passthru = DAG.getUNDEF(VT);

4325if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {

4326 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();

4327 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();

4328// If Hi constant is all the same sign bit as Lo, lower this as a custom

4329// node in order to try and match RVV vector/scalar instructions.

4330if ((LoC >> 31) == HiC)

4331return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru,Lo, VL);

4332

4333// If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,

4334// we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use

4335// vlmax vsetvli or vsetivli to change the VL.

4336// FIXME: Support larger constants?

4337// FIXME: Support non-constant VLs by saturating?

4338if (LoC == HiC) {

4339SDValue NewVL;

4340if (isAllOnesConstant(VL) ||

4341 (isa<RegisterSDNode>(VL) &&

4342 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))

4343 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);

4344elseif (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))

4345 NewVL = DAG.getNode(ISD::ADD,DL, VL.getValueType(), VL, VL);

4346

4347if (NewVL) {

4348MVT InterVT =

4349MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);

4350auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, InterVT,

4351 DAG.getUNDEF(InterVT),Lo, NewVL);

4352return DAG.getNode(ISD::BITCAST,DL, VT, InterVec);

4353 }

4354 }

4355 }

4356

4357// Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.

4358if (Hi.getOpcode() ==ISD::SRA &&Hi.getOperand(0) ==Lo &&

4359 isa<ConstantSDNode>(Hi.getOperand(1)) &&

4360Hi.getConstantOperandVal(1) == 31)

4361return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru,Lo, VL);

4362

4363// If the hi bits of the splat are undefined, then it's fine to just splat Lo

4364// even if it might be sign extended.

4365if (Hi.isUndef())

4366return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru,Lo, VL);

4367

4368// Fall back to a stack store and stride x0 vector load.

4369return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL,DL, VT, Passthru,Lo,

4370Hi, VL);

4371}

4372

4373// Called by type legalization to handle splat of i64 on RV32.

4374// FIXME: We can optimize this when the type has sign or zero bits in one

4375// of the halves.

4376staticSDValue splatSplitI64WithVL(constSDLoc &DL,MVT VT,SDValue Passthru,

4377SDValue Scalar,SDValue VL,

4378SelectionDAG &DAG) {

4379assert(Scalar.getValueType() == MVT::i64 &&"Unexpected VT!");

4380SDValue Lo,Hi;

4381 std::tie(Lo,Hi) = DAG.SplitScalar(Scalar,DL, MVT::i32, MVT::i32);

4382returnsplatPartsI64WithVL(DL, VT, Passthru,Lo,Hi, VL, DAG);

4383}

4384

4385// This function lowers a splat of a scalar operand Splat with the vector

4386// length VL. It ensures the final sequence is type legal, which is useful when

4387// lowering a splat after type legalization.

4388staticSDValue lowerScalarSplat(SDValue Passthru,SDValue Scalar,SDValue VL,

4389MVT VT,constSDLoc &DL,SelectionDAG &DAG,

4390constRISCVSubtarget &Subtarget) {

4391bool HasPassthru = Passthru && !Passthru.isUndef();

4392if (!HasPassthru && !Passthru)

4393 Passthru = DAG.getUNDEF(VT);

4394

4395MVT EltVT = VT.getVectorElementType();

4396MVT XLenVT = Subtarget.getXLenVT();

4397

4398if (VT.isFloatingPoint()) {

4399if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||

4400 EltVT == MVT::bf16) {

4401if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||

4402 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))

4403 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Scalar);

4404else

4405 Scalar = DAG.getNode(ISD::BITCAST,DL, MVT::i16, Scalar);

4406MVT IVT = VT.changeVectorElementType(MVT::i16);

4407 Passthru = DAG.getNode(ISD::BITCAST,DL, IVT, Passthru);

4408SDValue Splat =

4409lowerScalarSplat(Passthru, Scalar, VL, IVT,DL, DAG, Subtarget);

4410return DAG.getNode(ISD::BITCAST,DL, VT,Splat);

4411 }

4412return DAG.getNode(RISCVISD::VFMV_V_F_VL,DL, VT, Passthru, Scalar, VL);

4413 }

4414

4415// Simplest case is that the operand needs to be promoted to XLenVT.

4416if (Scalar.getValueType().bitsLE(XLenVT)) {

4417// If the operand is a constant, sign extend to increase our chances

4418// of being able to use a .vi instruction. ANY_EXTEND would become a

4419// a zero extend and the simm5 check in isel would fail.

4420// FIXME: Should we ignore the upper bits in isel instead?

4421unsigned ExtOpc =

4422 isa<ConstantSDNode>(Scalar) ?ISD::SIGN_EXTEND :ISD::ANY_EXTEND;

4423 Scalar = DAG.getNode(ExtOpc,DL, XLenVT, Scalar);

4424return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru, Scalar, VL);

4425 }

4426

4427assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&

4428"Unexpected scalar for splat lowering!");

4429

4430if (isOneConstant(VL) &&isNullConstant(Scalar))

4431return DAG.getNode(RISCVISD::VMV_S_X_VL,DL, VT, Passthru,

4432 DAG.getConstant(0,DL, XLenVT), VL);

4433

4434// Otherwise use the more complicated splatting algorithm.

4435returnsplatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);

4436}

4437

4438// This function lowers an insert of a scalar operand Scalar into lane

4439// 0 of the vector regardless of the value of VL. The contents of the

4440// remaining lanes of the result vector are unspecified. VL is assumed

4441// to be non-zero.

4442staticSDValue lowerScalarInsert(SDValue Scalar,SDValue VL,MVT VT,

4443constSDLoc &DL,SelectionDAG &DAG,

4444constRISCVSubtarget &Subtarget) {

4445assert(VT.isScalableVector() &&"Expect VT is scalable vector type.");

4446

4447constMVT XLenVT = Subtarget.getXLenVT();

4448SDValue Passthru = DAG.getUNDEF(VT);

4449

4450if (Scalar.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&

4451isNullConstant(Scalar.getOperand(1))) {

4452SDValue ExtractedVal = Scalar.getOperand(0);

4453// The element types must be the same.

4454if (ExtractedVal.getValueType().getVectorElementType() ==

4455 VT.getVectorElementType()) {

4456MVT ExtractedVT = ExtractedVal.getSimpleValueType();

4457MVT ExtractedContainerVT = ExtractedVT;

4458if (ExtractedContainerVT.isFixedLengthVector()) {

4459 ExtractedContainerVT =getContainerForFixedLengthVector(

4460 DAG, ExtractedContainerVT, Subtarget);

4461 ExtractedVal =convertToScalableVector(ExtractedContainerVT,

4462 ExtractedVal, DAG, Subtarget);

4463 }

4464if (ExtractedContainerVT.bitsLE(VT))

4465return DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, Passthru,

4466 ExtractedVal, DAG.getVectorIdxConstant(0,DL));

4467return DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VT, ExtractedVal,

4468 DAG.getVectorIdxConstant(0,DL));

4469 }

4470 }

4471

4472

4473if (VT.isFloatingPoint())

4474return DAG.getNode(RISCVISD::VFMV_S_F_VL,DL, VT,

4475 DAG.getUNDEF(VT), Scalar, VL);

4476

4477// Avoid the tricky legalization cases by falling back to using the

4478// splat code which already handles it gracefully.

4479if (!Scalar.getValueType().bitsLE(XLenVT))

4480returnlowerScalarSplat(DAG.getUNDEF(VT), Scalar,

4481 DAG.getConstant(1,DL, XLenVT),

4482 VT,DL, DAG, Subtarget);

4483

4484// If the operand is a constant, sign extend to increase our chances

4485// of being able to use a .vi instruction. ANY_EXTEND would become a

4486// a zero extend and the simm5 check in isel would fail.

4487// FIXME: Should we ignore the upper bits in isel instead?

4488unsigned ExtOpc =

4489 isa<ConstantSDNode>(Scalar) ?ISD::SIGN_EXTEND :ISD::ANY_EXTEND;

4490 Scalar = DAG.getNode(ExtOpc,DL, XLenVT, Scalar);

4491return DAG.getNode(RISCVISD::VMV_S_X_VL,DL, VT, DAG.getUNDEF(VT), Scalar,

4492 VL);

4493}

4494

4495// Can this shuffle be performed on exactly one (possibly larger) input?

4496staticSDValue getSingleShuffleSrc(MVT VT,MVT ContainerVT,SDValue V1,

4497SDValue V2) {

4498

4499if (V2.isUndef() &&

4500RISCVTargetLowering::getLMUL(ContainerVT) !=RISCVII::VLMUL::LMUL_8)

4501return V1;

4502

4503// Both input must be extracts.

4504if (V1.getOpcode() !=ISD::EXTRACT_SUBVECTOR ||

4505 V2.getOpcode() !=ISD::EXTRACT_SUBVECTOR)

4506returnSDValue();

4507

4508// Extracting from the same source.

4509SDValue Src = V1.getOperand(0);

4510if (Src != V2.getOperand(0))

4511returnSDValue();

4512

4513// Src needs to have twice the number of elements.

4514unsigned NumElts = VT.getVectorNumElements();

4515if (Src.getValueType().getVectorNumElements() != (NumElts * 2))

4516returnSDValue();

4517

4518// The extracts must extract the two halves of the source.

4519if (V1.getConstantOperandVal(1) != 0 ||

4520 V2.getConstantOperandVal(1) != NumElts)

4521returnSDValue();

4522

4523return Src;

4524}

4525

4526/// Is this shuffle interleaving contiguous elements from one vector into the

4527/// even elements and contiguous elements from another vector into the odd

4528/// elements. \p EvenSrc will contain the element that should be in the first

4529/// even element. \p OddSrc will contain the element that should be in the first

4530/// odd element. These can be the first element in a source or the element half

4531/// way through the source.

4532staticboolisInterleaveShuffle(ArrayRef<int> Mask,MVT VT,int &EvenSrc,

4533int &OddSrc,constRISCVSubtarget &Subtarget) {

4534// We need to be able to widen elements to the next larger integer type.

4535if (VT.getScalarSizeInBits() >= Subtarget.getELen())

4536returnfalse;

4537

4538intSize = Mask.size();

4539int NumElts = VT.getVectorNumElements();

4540assert(Size == (int)NumElts &&"Unexpected mask size");

4541

4542SmallVector<unsigned, 2> StartIndexes;

4543if (!ShuffleVectorInst::isInterleaveMask(Mask, 2,Size * 2, StartIndexes))

4544returnfalse;

4545

4546 EvenSrc = StartIndexes[0];

4547 OddSrc = StartIndexes[1];

4548

4549// One source should be low half of first vector.

4550if (EvenSrc != 0 && OddSrc != 0)

4551returnfalse;

4552

4553// Subvectors will be subtracted from either at the start of the two input

4554// vectors, or at the start and middle of the first vector if it's an unary

4555// interleave.

4556// In both cases, HalfNumElts will be extracted.

4557// We need to ensure that the extract indices are 0 or HalfNumElts otherwise

4558// we'll create an illegal extract_subvector.

4559// FIXME: We could support other values using a slidedown first.

4560int HalfNumElts = NumElts / 2;

4561return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);

4562}

4563

4564/// Match shuffles that concatenate two vectors, rotate the concatenation,

4565/// and then extract the original number of elements from the rotated result.

4566/// This is equivalent to vector.splice or X86's PALIGNR instruction. The

4567/// returned rotation amount is for a rotate right, where elements move from

4568/// higher elements to lower elements. \p LoSrc indicates the first source

4569/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector

4570/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be

4571/// 0 or 1 if a rotation is found.

4572///

4573/// NOTE: We talk about rotate to the right which matches how bit shift and

4574/// rotate instructions are described where LSBs are on the right, but LLVM IR

4575/// and the table below write vectors with the lowest elements on the left.

4576staticintisElementRotate(int &LoSrc,int &HiSrc,ArrayRef<int> Mask) {

4577intSize = Mask.size();

4578

4579// We need to detect various ways of spelling a rotation:

4580// [11, 12, 13, 14, 15, 0, 1, 2]

4581// [-1, 12, 13, 14, -1, -1, 1, -1]

4582// [-1, -1, -1, -1, -1, -1, 1, 2]

4583// [ 3, 4, 5, 6, 7, 8, 9, 10]

4584// [-1, 4, 5, 6, -1, -1, 9, -1]

4585// [-1, 4, 5, 6, -1, -1, -1, -1]

4586int Rotation = 0;

4587 LoSrc = -1;

4588 HiSrc = -1;

4589for (int i = 0; i !=Size; ++i) {

4590int M = Mask[i];

4591if (M < 0)

4592continue;

4593

4594// Determine where a rotate vector would have started.

4595int StartIdx = i - (M %Size);

4596// The identity rotation isn't interesting, stop.

4597if (StartIdx == 0)

4598return -1;

4599

4600// If we found the tail of a vector the rotation must be the missing

4601// front. If we found the head of a vector, it must be how much of the

4602// head.

4603int CandidateRotation = StartIdx < 0 ? -StartIdx :Size - StartIdx;

4604

4605if (Rotation == 0)

4606 Rotation = CandidateRotation;

4607elseif (Rotation != CandidateRotation)

4608// The rotations don't match, so we can't match this mask.

4609return -1;

4610

4611// Compute which value this mask is pointing at.

4612int MaskSrc = M <Size ? 0 : 1;

4613

4614// Compute which of the two target values this index should be assigned to.

4615// This reflects whether the high elements are remaining or the low elemnts

4616// are remaining.

4617int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;

4618

4619// Either set up this value if we've not encountered it before, or check

4620// that it remains consistent.

4621if (TargetSrc < 0)

4622 TargetSrc = MaskSrc;

4623elseif (TargetSrc != MaskSrc)

4624// This may be a rotation, but it pulls from the inputs in some

4625// unsupported interleaving.

4626return -1;

4627 }

4628

4629// Check that we successfully analyzed the mask, and normalize the results.

4630assert(Rotation != 0 &&"Failed to locate a viable rotation!");

4631assert((LoSrc >= 0 || HiSrc >= 0) &&

4632"Failed to find a rotated input vector!");

4633

4634return Rotation;

4635}

4636

4637// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be

4638// 2, 4, 8 and the integer type Factor-times larger than VT's

4639// element type must be a legal element type.

4640// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)

4641// -> [p, q, r, s] (Factor=2, Index=1)

4642staticSDValue getDeinterleaveShiftAndTrunc(constSDLoc &DL,MVT VT,

4643SDValue Src,unsigned Factor,

4644unsigned Index,SelectionDAG &DAG) {

4645unsigned EltBits = VT.getScalarSizeInBits();

4646ElementCount SrcEC = Src.getValueType().getVectorElementCount();

4647MVT WideSrcVT =MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),

4648 SrcEC.divideCoefficientBy(Factor));

4649MVT ResVT =MVT::getVectorVT(MVT::getIntegerVT(EltBits),

4650 SrcEC.divideCoefficientBy(Factor));

4651 Src = DAG.getBitcast(WideSrcVT, Src);

4652

4653unsigned Shift = Index * EltBits;

4654SDValue Res = DAG.getNode(ISD::SRL,DL, WideSrcVT, Src,

4655 DAG.getConstant(Shift,DL, WideSrcVT));

4656 Res = DAG.getNode(ISD::TRUNCATE,DL, ResVT, Res);

4657MVT IntVT = VT.changeVectorElementTypeToInteger();

4658 Res = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, IntVT, DAG.getUNDEF(IntVT), Res,

4659 DAG.getVectorIdxConstant(0,DL));

4660return DAG.getBitcast(VT, Res);

4661}

4662

4663// Lower the following shuffle to vslidedown.

4664// a)

4665// t49: v8i8 = extract_subvector t13, Constant:i64<0>

4666// t109: v8i8 = extract_subvector t13, Constant:i64<8>

4667// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106

4668// b)

4669// t69: v16i16 = extract_subvector t68, Constant:i64<0>

4670// t23: v8i16 = extract_subvector t69, Constant:i64<0>

4671// t29: v4i16 = extract_subvector t23, Constant:i64<4>

4672// t26: v8i16 = extract_subvector t69, Constant:i64<8>

4673// t30: v4i16 = extract_subvector t26, Constant:i64<0>

4674// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30

4675staticSDValue lowerVECTOR_SHUFFLEAsVSlidedown(constSDLoc &DL,MVT VT,

4676SDValue V1,SDValue V2,

4677ArrayRef<int> Mask,

4678constRISCVSubtarget &Subtarget,

4679SelectionDAG &DAG) {

4680auto findNonEXTRACT_SUBVECTORParent =

4681 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {

4682uint64_t Offset = 0;

4683while (Parent.getOpcode() ==ISD::EXTRACT_SUBVECTOR &&

4684// EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from

4685// a scalable vector. But we don't want to match the case.

4686 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {

4687Offset += Parent.getConstantOperandVal(1);

4688 Parent = Parent.getOperand(0);

4689 }

4690return std::make_pair(Parent,Offset);

4691 };

4692

4693auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);

4694auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);

4695

4696// Extracting from the same source.

4697SDValue Src = V1Src;

4698if (Src != V2Src)

4699returnSDValue();

4700

4701// Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.

4702SmallVector<int, 16> NewMask(Mask);

4703for (size_t i = 0; i != NewMask.size(); ++i) {

4704if (NewMask[i] == -1)

4705continue;

4706

4707if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {

4708 NewMask[i] = NewMask[i] + V1IndexOffset;

4709 }else {

4710// Minus NewMask.size() is needed. Otherwise, the b case would be

4711// <5,6,7,12> instead of <5,6,7,8>.

4712 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;

4713 }

4714 }

4715

4716// First index must be known and non-zero. It will be used as the slidedown

4717// amount.

4718if (NewMask[0] <= 0)

4719returnSDValue();

4720

4721// NewMask is also continuous.

4722for (unsigned i = 1; i != NewMask.size(); ++i)

4723if (NewMask[i - 1] + 1 != NewMask[i])

4724returnSDValue();

4725

4726MVT XLenVT = Subtarget.getXLenVT();

4727MVT SrcVT = Src.getSimpleValueType();

4728MVT ContainerVT =getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);

4729auto [TrueMask, VL] =getDefaultVLOps(SrcVT, ContainerVT,DL, DAG, Subtarget);

4730SDValue Slidedown =

4731getVSlidedown(DAG, Subtarget,DL, ContainerVT, DAG.getUNDEF(ContainerVT),

4732convertToScalableVector(ContainerVT, Src, DAG, Subtarget),

4733 DAG.getConstant(NewMask[0],DL, XLenVT), TrueMask, VL);

4734return DAG.getNode(

4735ISD::EXTRACT_SUBVECTOR,DL, VT,

4736convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),

4737 DAG.getConstant(0,DL, XLenVT));

4738}

4739

4740// Because vslideup leaves the destination elements at the start intact, we can

4741// use it to perform shuffles that insert subvectors:

4742//

4743// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>

4744// ->

4745// vsetvli zero, 8, e8, mf2, ta, ma

4746// vslideup.vi v8, v9, 4

4747//

4748// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>

4749// ->

4750// vsetvli zero, 5, e8, mf2, tu, ma

4751// vslideup.v1 v8, v9, 2

4752staticSDValue lowerVECTOR_SHUFFLEAsVSlideup(constSDLoc &DL,MVT VT,

4753SDValue V1,SDValue V2,

4754ArrayRef<int> Mask,

4755constRISCVSubtarget &Subtarget,

4756SelectionDAG &DAG) {

4757unsigned NumElts = VT.getVectorNumElements();

4758int NumSubElts, Index;

4759if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,

4760 Index))

4761returnSDValue();

4762

4763bool OpsSwapped = Mask[Index] < (int)NumElts;

4764SDValue InPlace = OpsSwapped ? V2 : V1;

4765SDValue ToInsert = OpsSwapped ? V1 : V2;

4766

4767MVT XLenVT = Subtarget.getXLenVT();

4768MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

4769auto TrueMask =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).first;

4770// We slide up by the index that the subvector is being inserted at, and set

4771// VL to the index + the number of elements being inserted.

4772unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED |RISCVII::MASK_AGNOSTIC;

4773// If the we're adding a suffix to the in place vector, i.e. inserting right

4774// up to the very end of it, then we don't actually care about the tail.

4775if (NumSubElts + Index >= (int)NumElts)

4776 Policy |=RISCVII::TAIL_AGNOSTIC;

4777

4778 InPlace =convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);

4779 ToInsert =convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);

4780SDValue VL = DAG.getConstant(NumSubElts + Index,DL, XLenVT);

4781

4782SDValue Res;

4783// If we're inserting into the lowest elements, use a tail undisturbed

4784// vmv.v.v.

4785if (Index == 0)

4786 Res = DAG.getNode(RISCVISD::VMV_V_V_VL,DL, ContainerVT, InPlace, ToInsert,

4787 VL);

4788else

4789 Res =getVSlideup(DAG, Subtarget,DL, ContainerVT, InPlace, ToInsert,

4790 DAG.getConstant(Index,DL, XLenVT), TrueMask, VL, Policy);

4791returnconvertFromScalableVector(VT, Res, DAG, Subtarget);

4792}

4793

4794/// Match v(f)slide1up/down idioms. These operations involve sliding

4795/// N-1 elements to make room for an inserted scalar at one end.

4796staticSDValue lowerVECTOR_SHUFFLEAsVSlide1(constSDLoc &DL,MVT VT,

4797SDValue V1,SDValue V2,

4798ArrayRef<int> Mask,

4799constRISCVSubtarget &Subtarget,

4800SelectionDAG &DAG) {

4801bool OpsSwapped =false;

4802if (!isa<BuildVectorSDNode>(V1)) {

4803if (!isa<BuildVectorSDNode>(V2))

4804returnSDValue();

4805std::swap(V1, V2);

4806 OpsSwapped =true;

4807 }

4808SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();

4809if (!Splat)

4810returnSDValue();

4811

4812// Return true if the mask could describe a slide of Mask.size() - 1

4813// elements from concat_vector(V1, V2)[Base:] to [Offset:].

4814auto isSlideMask = [](ArrayRef<int> Mask,unsignedBase,intOffset) {

4815constunsigned S = (Offset > 0) ? 0 : -Offset;

4816constunsigned E = Mask.size() - ((Offset > 0) ?Offset : 0);

4817for (unsigned i = S; i != E; ++i)

4818if (Mask[i] >= 0 && (unsigned)Mask[i] !=Base + i +Offset)

4819returnfalse;

4820returntrue;

4821 };

4822

4823constunsigned NumElts = VT.getVectorNumElements();

4824bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);

4825if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))

4826returnSDValue();

4827

4828constint InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];

4829// Inserted lane must come from splat, undef scalar is legal but not profitable.

4830if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)

4831returnSDValue();

4832

4833MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

4834auto [TrueMask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

4835

4836// zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +

4837// vslide1{down,up}.vx instead.

4838if (VT.getVectorElementType() == MVT::bf16 ||

4839 (VT.getVectorElementType() == MVT::f16 &&

4840 !Subtarget.hasVInstructionsF16())) {

4841MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();

4842Splat =

4843 DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, Subtarget.getXLenVT(),Splat);

4844 V2 = DAG.getBitcast(

4845 IntVT,convertToScalableVector(ContainerVT, V2, DAG, Subtarget));

4846SDValue Vec = DAG.getNode(

4847 IsVSlidedown ?RISCVISD::VSLIDE1DOWN_VL :RISCVISD::VSLIDE1UP_VL,DL,

4848 IntVT, DAG.getUNDEF(IntVT), V2,Splat, TrueMask, VL);

4849 Vec = DAG.getBitcast(ContainerVT, Vec);

4850returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);

4851 }

4852

4853auto OpCode = IsVSlidedown ?

4854 (VT.isFloatingPoint() ?RISCVISD::VFSLIDE1DOWN_VL :RISCVISD::VSLIDE1DOWN_VL) :

4855 (VT.isFloatingPoint() ?RISCVISD::VFSLIDE1UP_VL :RISCVISD::VSLIDE1UP_VL);

4856if (!VT.isFloatingPoint())

4857Splat = DAG.getNode(ISD::ANY_EXTEND,DL, Subtarget.getXLenVT(),Splat);

4858auto Vec = DAG.getNode(OpCode,DL, ContainerVT,

4859 DAG.getUNDEF(ContainerVT),

4860convertToScalableVector(ContainerVT, V2, DAG, Subtarget),

4861Splat, TrueMask, VL);

4862returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);

4863}

4864

4865// Match a mask which "spreads" the leading elements of a vector evenly

4866// across the result. Factor is the spread amount, and Index is the

4867// offset applied. (on success, Index < Factor) This is the inverse

4868// of a deinterleave with the same Factor and Index. This is analogous

4869// to an interleave, except that all but one lane is undef.

4870staticboolisSpreadMask(ArrayRef<int> Mask,unsigned Factor,unsigned &Index) {

4871SmallVector<bool> LaneIsUndef(Factor,true);

4872for (unsigned i = 0; i < Mask.size(); i++)

4873 LaneIsUndef[i % Factor] &= (Mask[i] == -1);

4874

4875bool Found =false;

4876for (unsigned i = 0; i < Factor; i++) {

4877if (LaneIsUndef[i])

4878continue;

4879if (Found)

4880returnfalse;

4881 Index = i;

4882 Found =true;

4883 }

4884if (!Found)

4885returnfalse;

4886

4887for (unsigned i = 0; i < Mask.size() / Factor; i++) {

4888unsigned j = i * Factor + Index;

4889if (Mask[j] != -1 && (unsigned)Mask[j] != i)

4890returnfalse;

4891 }

4892returntrue;

4893}

4894

4895// Given a vector a, b, c, d return a vector Factor times longer

4896// with Factor-1 undef's between elements. Ex:

4897// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)

4898// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)

4899staticSDValue getWideningSpread(SDValue V,unsigned Factor,unsigned Index,

4900constSDLoc &DL,SelectionDAG &DAG) {

4901

4902MVT VT = V.getSimpleValueType();

4903unsigned EltBits = VT.getScalarSizeInBits();

4904ElementCount EC = VT.getVectorElementCount();

4905 V = DAG.getBitcast(VT.changeTypeToInteger(), V);

4906

4907MVT WideVT =MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);

4908

4909SDValue Result = DAG.getNode(ISD::ZERO_EXTEND,DL, WideVT, V);

4910// TODO: On rv32, the constant becomes a splat_vector_parts which does not

4911// allow the SHL to fold away if Index is 0.

4912if (Index != 0)

4913 Result = DAG.getNode(ISD::SHL,DL, WideVT, Result,

4914 DAG.getConstant(EltBits * Index,DL, WideVT));

4915// Make sure to use original element type

4916MVT ResultVT =MVT::getVectorVT(VT.getVectorElementType(),

4917 EC.multiplyCoefficientBy(Factor));

4918return DAG.getBitcast(ResultVT, Result);

4919}

4920

4921// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx

4922// to create an interleaved vector of <[vscale x] n*2 x ty>.

4923// This requires that the size of ty is less than the subtarget's maximum ELEN.

4924staticSDValue getWideningInterleave(SDValue EvenV,SDValue OddV,

4925constSDLoc &DL,SelectionDAG &DAG,

4926constRISCVSubtarget &Subtarget) {

4927

4928// FIXME: Not only does this optimize the code, it fixes some correctness

4929// issues because MIR does not have freeze.

4930if (EvenV.isUndef())

4931returngetWideningSpread(OddV, 2, 1,DL, DAG);

4932if (OddV.isUndef())

4933returngetWideningSpread(EvenV, 2, 0,DL, DAG);

4934

4935MVT VecVT = EvenV.getSimpleValueType();

4936MVT VecContainerVT = VecVT;// <vscale x n x ty>

4937// Convert fixed vectors to scalable if needed

4938if (VecContainerVT.isFixedLengthVector()) {

4939 VecContainerVT =getContainerForFixedLengthVector(DAG, VecVT, Subtarget);

4940 EvenV =convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);

4941 OddV =convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);

4942 }

4943

4944assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());

4945

4946// We're working with a vector of the same size as the resulting

4947// interleaved vector, but with half the number of elements and

4948// twice the SEW (Hence the restriction on not using the maximum

4949// ELEN)

4950MVT WideVT =

4951MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),

4952 VecVT.getVectorElementCount());

4953MVT WideContainerVT = WideVT;// <vscale x n x ty*2>

4954if (WideContainerVT.isFixedLengthVector())

4955 WideContainerVT =getContainerForFixedLengthVector(DAG, WideVT, Subtarget);

4956

4957// Bitcast the input vectors to integers in case they are FP

4958 VecContainerVT = VecContainerVT.changeTypeToInteger();

4959 EvenV = DAG.getBitcast(VecContainerVT, EvenV);

4960 OddV = DAG.getBitcast(VecContainerVT, OddV);

4961

4962auto [Mask, VL] =getDefaultVLOps(VecVT, VecContainerVT,DL, DAG, Subtarget);

4963SDValue Passthru = DAG.getUNDEF(WideContainerVT);

4964

4965SDValue Interleaved;

4966if (Subtarget.hasStdExtZvbb()) {

4967// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.

4968SDValue OffsetVec =

4969 DAG.getConstant(VecVT.getScalarSizeInBits(),DL, VecContainerVT);

4970 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL,DL, WideContainerVT, OddV,

4971 OffsetVec, Passthru, Mask, VL);

4972 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL,DL, WideContainerVT,

4973 Interleaved, EvenV, Passthru, Mask, VL);

4974 }else {

4975// FIXME: We should freeze the odd vector here. We already handled the case

4976// of provably undef/poison above.

4977

4978// Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with

4979// vwaddu.vv

4980 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL,DL, WideContainerVT, EvenV,

4981 OddV, Passthru, Mask, VL);

4982

4983// Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)

4984SDValue AllOnesVec = DAG.getSplatVector(

4985 VecContainerVT,DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));

4986SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL,DL, WideContainerVT,

4987 OddV, AllOnesVec, Passthru, Mask, VL);

4988

4989// Add the two together so we get

4990// (OddV * 0xff...ff) + (OddV + EvenV)

4991// = (OddV * 0x100...00) + EvenV

4992// = (OddV << VecVT.getScalarSizeInBits()) + EvenV

4993// Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx

4994 Interleaved = DAG.getNode(RISCVISD::ADD_VL,DL, WideContainerVT,

4995 Interleaved, OddsMul, Passthru, Mask, VL);

4996 }

4997

4998// Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>

4999MVT ResultContainerVT =MVT::getVectorVT(

5000 VecVT.getVectorElementType(),// Make sure to use original type

5001 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));

5002 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);

5003

5004// Convert back to a fixed vector if needed

5005MVT ResultVT =

5006MVT::getVectorVT(VecVT.getVectorElementType(),

5007 VecVT.getVectorElementCount().multiplyCoefficientBy(2));

5008if (ResultVT.isFixedLengthVector())

5009 Interleaved =

5010convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);

5011

5012return Interleaved;

5013}

5014

5015// If we have a vector of bits that we want to reverse, we can use a vbrev on a

5016// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.

5017staticSDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,

5018SelectionDAG &DAG,

5019constRISCVSubtarget &Subtarget) {

5020SDLoc DL(SVN);

5021MVT VT = SVN->getSimpleValueType(0);

5022SDValue V = SVN->getOperand(0);

5023unsigned NumElts = VT.getVectorNumElements();

5024

5025assert(VT.getVectorElementType() == MVT::i1);

5026

5027if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),

5028 SVN->getMask().size()) ||

5029 !SVN->getOperand(1).isUndef())

5030returnSDValue();

5031

5032unsigned ViaEltSize = std::max((uint64_t)8,PowerOf2Ceil(NumElts));

5033EVT ViaVT =EVT::getVectorVT(

5034 *DAG.getContext(),EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);

5035EVT ViaBitVT =

5036EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());

5037

5038// If we don't have zvbb or the larger element type > ELEN, the operation will

5039// be illegal.

5040if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,

5041 ViaVT) ||

5042 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))

5043returnSDValue();

5044

5045// If the bit vector doesn't fit exactly into the larger element type, we need

5046// to insert it into the larger vector and then shift up the reversed bits

5047// afterwards to get rid of the gap introduced.

5048if (ViaEltSize > NumElts)

5049 V = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),

5050 V, DAG.getVectorIdxConstant(0,DL));

5051

5052SDValue Res =

5053 DAG.getNode(ISD::BITREVERSE,DL, ViaVT, DAG.getBitcast(ViaVT, V));

5054

5055// Shift up the reversed bits if the vector didn't exactly fit into the larger

5056// element type.

5057if (ViaEltSize > NumElts)

5058 Res = DAG.getNode(ISD::SRL,DL, ViaVT, Res,

5059 DAG.getConstant(ViaEltSize - NumElts,DL, ViaVT));

5060

5061 Res = DAG.getBitcast(ViaBitVT, Res);

5062

5063if (ViaEltSize > NumElts)

5064 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VT, Res,

5065 DAG.getVectorIdxConstant(0,DL));

5066return Res;

5067}

5068

5069staticboolisLegalBitRotate(ShuffleVectorSDNode *SVN,

5070SelectionDAG &DAG,

5071constRISCVSubtarget &Subtarget,

5072MVT &RotateVT,unsigned &RotateAmt) {

5073SDLoc DL(SVN);

5074

5075EVT VT = SVN->getValueType(0);

5076unsigned NumElts = VT.getVectorNumElements();

5077unsigned EltSizeInBits = VT.getScalarSizeInBits();

5078unsigned NumSubElts;

5079if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,

5080 NumElts, NumSubElts, RotateAmt))

5081returnfalse;

5082 RotateVT =MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),

5083 NumElts / NumSubElts);

5084

5085// We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.

5086return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);

5087}

5088

5089// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can

5090// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this

5091// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.

5092staticSDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,

5093SelectionDAG &DAG,

5094constRISCVSubtarget &Subtarget) {

5095SDLoc DL(SVN);

5096

5097EVT VT = SVN->getValueType(0);

5098unsigned RotateAmt;

5099MVT RotateVT;

5100if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))

5101returnSDValue();

5102

5103SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));

5104

5105SDValue Rotate;

5106// A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,

5107// so canonicalize to vrev8.

5108if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)

5109 Rotate = DAG.getNode(ISD::BSWAP,DL, RotateVT,Op);

5110else

5111 Rotate = DAG.getNode(ISD::ROTL,DL, RotateVT,Op,

5112 DAG.getConstant(RotateAmt,DL, RotateVT));

5113

5114return DAG.getBitcast(VT, Rotate);

5115}

5116

5117// If compiling with an exactly known VLEN, see if we can split a

5118// shuffle on m2 or larger into a small number of m1 sized shuffles

5119// which write each destination registers exactly once.

5120staticSDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,

5121SelectionDAG &DAG,

5122constRISCVSubtarget &Subtarget) {

5123SDLoc DL(SVN);

5124MVT VT = SVN->getSimpleValueType(0);

5125SDValue V1 = SVN->getOperand(0);

5126SDValue V2 = SVN->getOperand(1);

5127ArrayRef<int> Mask = SVN->getMask();

5128

5129// If we don't know exact data layout, not much we can do. If this

5130// is already m1 or smaller, no point in splitting further.

5131constauto VLen = Subtarget.getRealVLen();

5132if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)

5133returnSDValue();

5134

5135// Avoid picking up bitrotate patterns which we have a linear-in-lmul

5136// expansion for.

5137unsigned RotateAmt;

5138MVT RotateVT;

5139if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))

5140returnSDValue();

5141

5142MVT ElemVT = VT.getVectorElementType();

5143unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();

5144

5145EVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

5146MVT OneRegVT =MVT::getVectorVT(ElemVT, ElemsPerVReg);

5147MVT M1VT =getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);

5148assert(M1VT ==getLMUL1VT(M1VT));

5149unsigned NumOpElts = M1VT.getVectorMinNumElements();

5150unsigned NumElts = ContainerVT.getVectorMinNumElements();

5151unsigned NumOfSrcRegs = NumElts / NumOpElts;

5152unsigned NumOfDestRegs = NumElts / NumOpElts;

5153// The following semantically builds up a fixed length concat_vector

5154// of the component shuffle_vectors. We eagerly lower to scalable here

5155// to avoid DAG combining it back to a large shuffle_vector again.

5156 V1 =convertToScalableVector(ContainerVT, V1, DAG, Subtarget);

5157 V2 =convertToScalableVector(ContainerVT, V2, DAG, Subtarget);

5158SmallVector<SmallVector<std::tuple<unsigned, unsigned, SmallVector<int>>>>

5159Operands;

5160processShuffleMasks(

5161 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,

5162 [&]() {Operands.emplace_back(); },

5163 [&](ArrayRef<int> SrcSubMask,unsigned SrcVecIdx,unsigned DstVecIdx) {

5164Operands.emplace_back().emplace_back(

5165 SrcVecIdx, UINT_MAX,

5166SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));

5167 },

5168 [&](ArrayRef<int> SrcSubMask,unsigned Idx1,unsigned Idx2,bool NewReg) {

5169if (NewReg)

5170Operands.emplace_back();

5171Operands.back().emplace_back(

5172 Idx1, Idx2,SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));

5173 });

5174assert(Operands.size() == NumOfDestRegs &&"Whole vector must be processed");

5175// Note: check that we do not emit too many shuffles here to prevent code

5176// size explosion.

5177// TODO: investigate, if it can be improved by extra analysis of the masks to

5178// check if the code is more profitable.

5179unsigned NumShuffles = std::accumulate(

5180Operands.begin(),Operands.end(), 0u,

5181 [&](unsignedN,

5182ArrayRef<std::tuple<unsigned,unsigned,SmallVector<int>>>Data) {

5183 if (Data.empty())

5184 return N;

5185 N += Data.size();

5186 for (const auto &P : Data) {

5187 unsigned Idx2 = std::get<1>(P);

5188 ArrayRef<int> Mask = std::get<2>(P);

5189 if (Idx2 != UINT_MAX)

5190 ++N;

5191 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))

5192 --N;

5193 }

5194returnN;

5195 });

5196if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||

5197 (NumOfDestRegs <= 2 && NumShuffles >= 4))

5198returnSDValue();

5199auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec,unsigned ExtractIdx) {

5200SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, M1VT, SrcVec,

5201 DAG.getVectorIdxConstant(ExtractIdx,DL));

5202 SubVec =convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);

5203return SubVec;

5204 };

5205auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1,SDValue SubVec2,

5206ArrayRef<int>Mask) {

5207SDValue SubVec = DAG.getVectorShuffle(OneRegVT,DL, SubVec1, SubVec2, Mask);

5208return SubVec;

5209 };

5210SDValue Vec = DAG.getUNDEF(ContainerVT);

5211for (auto [I,Data] :enumerate(Operands)) {

5212if (Data.empty())

5213continue;

5214SmallDenseMap<unsigned, SDValue, 4> Values;

5215for (unsignedI : seq<unsigned>(Data.size())) {

5216constauto &[Idx1, Idx2,_] =Data[I];

5217if (Values.contains(Idx1)) {

5218assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&

5219"Expected both indices to be extracted already.");

5220break;

5221 }

5222SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,

5223 (Idx1 % NumOfSrcRegs) * NumOpElts);

5224 Values[Idx1] =V;

5225if (Idx2 != UINT_MAX)

5226 Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,

5227 (Idx2 % NumOfSrcRegs) * NumOpElts);

5228 }

5229SDValue V;

5230for (constauto &[Idx1, Idx2, Mask] :Data) {

5231SDValue V1 = Values.at(Idx1);

5232SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);

5233V = PerformShuffle(V1, V2, Mask);

5234 Values[Idx1] =V;

5235 }

5236

5237unsigned InsertIdx =I * NumOpElts;

5238V =convertToScalableVector(M1VT, V, DAG, Subtarget);

5239 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVT, Vec, V,

5240 DAG.getVectorIdxConstant(InsertIdx,DL));

5241 }

5242returnconvertFromScalableVector(VT, Vec, DAG, Subtarget);

5243}

5244

5245// Matches a subset of compress masks with a contiguous prefix of output

5246// elements. This could be extended to allow gaps by deciding which

5247// source elements to spuriously demand.

5248staticboolisCompressMask(ArrayRef<int> Mask) {

5249intLast = -1;

5250bool SawUndef =false;

5251for (unsigned i = 0; i < Mask.size(); i++) {

5252if (Mask[i] == -1) {

5253 SawUndef =true;

5254continue;

5255 }

5256if (SawUndef)

5257returnfalse;

5258if (i > (unsigned)Mask[i])

5259returnfalse;

5260if (Mask[i] <=Last)

5261returnfalse;

5262Last = Mask[i];

5263 }

5264returntrue;

5265}

5266

5267/// Given a shuffle where the indices are disjoint between the two sources,

5268/// e.g.:

5269///

5270/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>

5271///

5272/// Merge the two sources into one and do a single source shuffle:

5273///

5274/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>

5275/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>

5276///

5277/// A vselect will either be merged into a masked instruction or be lowered as a

5278/// vmerge.vvm, which is cheaper than a vrgather.vv.

5279staticSDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,

5280SelectionDAG &DAG,

5281constRISCVSubtarget &Subtarget) {

5282MVT VT = SVN->getSimpleValueType(0);

5283MVT XLenVT = Subtarget.getXLenVT();

5284SDLoc DL(SVN);

5285

5286constArrayRef<int> Mask = SVN->getMask();

5287

5288// Work out which source each lane will come from.

5289SmallVector<int, 16> Srcs(Mask.size(), -1);

5290

5291for (intIdx : Mask) {

5292if (Idx == -1)

5293continue;

5294unsigned SrcIdx =Idx % Mask.size();

5295int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;

5296if (Srcs[SrcIdx] == -1)

5297// Mark this source as using this lane.

5298 Srcs[SrcIdx] = Src;

5299elseif (Srcs[SrcIdx] != Src)

5300// The other source is using this lane: not disjoint.

5301returnSDValue();

5302 }

5303

5304SmallVector<SDValue> SelectMaskVals;

5305for (int Lane : Srcs) {

5306if (Lane == -1)

5307 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));

5308else

5309 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1,DL, XLenVT));

5310 }

5311MVT MaskVT = VT.changeVectorElementType(MVT::i1);

5312SDValue SelectMask = DAG.getBuildVector(MaskVT,DL, SelectMaskVals);

5313SDValue Select = DAG.getNode(ISD::VSELECT,DL, VT, SelectMask,

5314 SVN->getOperand(0), SVN->getOperand(1));

5315

5316// Move all indices relative to the first source.

5317SmallVector<int> NewMask(Mask.size());

5318for (unsignedI = 0;I < Mask.size();I++) {

5319if (Mask[I] == -1)

5320 NewMask[I] = -1;

5321else

5322 NewMask[I] = Mask[I] % Mask.size();

5323 }

5324

5325return DAG.getVectorShuffle(VT,DL,Select, DAG.getUNDEF(VT), NewMask);

5326}

5327

5328/// Try to widen element type to get a new mask value for a better permutation

5329/// sequence. This doesn't try to inspect the widened mask for profitability;

5330/// we speculate the widened form is equal or better. This has the effect of

5331/// reducing mask constant sizes - allowing cheaper materialization sequences

5332/// - and index sequence sizes - reducing register pressure and materialization

5333/// cost, at the cost of (possibly) an extra VTYPE toggle.

5334staticSDValue tryWidenMaskForShuffle(SDValue Op,SelectionDAG &DAG) {

5335SDLoc DL(Op);

5336MVT VT =Op.getSimpleValueType();

5337MVT ScalarVT = VT.getVectorElementType();

5338unsigned ElementSize = ScalarVT.getFixedSizeInBits();

5339SDValue V0 =Op.getOperand(0);

5340SDValue V1 =Op.getOperand(1);

5341ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();

5342

5343// Avoid wasted work leading to isTypeLegal check failing below

5344if (ElementSize > 32)

5345returnSDValue();

5346

5347SmallVector<int, 8> NewMask;

5348if (!widenShuffleMaskElts(Mask, NewMask))

5349returnSDValue();

5350

5351MVT NewEltVT = VT.isFloatingPoint() ?MVT::getFloatingPointVT(ElementSize * 2)

5352 :MVT::getIntegerVT(ElementSize * 2);

5353MVT NewVT =MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);

5354if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))

5355returnSDValue();

5356 V0 = DAG.getBitcast(NewVT, V0);

5357 V1 = DAG.getBitcast(NewVT, V1);

5358return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT,DL, V0, V1, NewMask));

5359}

5360

5361staticSDValue lowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG &DAG,

5362constRISCVSubtarget &Subtarget) {

5363SDValue V1 =Op.getOperand(0);

5364SDValue V2 =Op.getOperand(1);

5365SDLoc DL(Op);

5366MVT XLenVT = Subtarget.getXLenVT();

5367MVT VT =Op.getSimpleValueType();

5368unsigned NumElts = VT.getVectorNumElements();

5369ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());

5370

5371if (VT.getVectorElementType() == MVT::i1) {

5372// Lower to a vror.vi of a larger element type if possible before we promote

5373// i1s to i8s.

5374if (SDValue V =lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))

5375return V;

5376if (SDValue V =lowerBitreverseShuffle(SVN, DAG, Subtarget))

5377return V;

5378

5379// Promote i1 shuffle to i8 shuffle.

5380MVT WidenVT =MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());

5381 V1 = DAG.getNode(ISD::ZERO_EXTEND,DL, WidenVT, V1);

5382 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)

5383 : DAG.getNode(ISD::ZERO_EXTEND,DL, WidenVT, V2);

5384SDValue Shuffled = DAG.getVectorShuffle(WidenVT,DL, V1, V2, SVN->getMask());

5385return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0,DL, WidenVT),

5386ISD::SETNE);

5387 }

5388

5389MVT ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

5390

5391auto [TrueMask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

5392

5393if (SVN->isSplat()) {

5394constint Lane = SVN->getSplatIndex();

5395if (Lane >= 0) {

5396MVT SVT = VT.getVectorElementType();

5397

5398// Turn splatted vector load into a strided load with an X0 stride.

5399SDValue V = V1;

5400// Peek through CONCAT_VECTORS as VectorCombine can concat a vector

5401// with undef.

5402// FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?

5403intOffset = Lane;

5404if (V.getOpcode() ==ISD::CONCAT_VECTORS) {

5405int OpElements =

5406 V.getOperand(0).getSimpleValueType().getVectorNumElements();

5407 V = V.getOperand(Offset / OpElements);

5408Offset %= OpElements;

5409 }

5410

5411// We need to ensure the load isn't atomic or volatile.

5412if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {

5413auto *Ld = cast<LoadSDNode>(V);

5414Offset *= SVT.getStoreSize();

5415SDValue NewAddr = DAG.getMemBasePlusOffset(

5416 Ld->getBasePtr(),TypeSize::getFixed(Offset),DL);

5417

5418// If this is SEW=64 on RV32, use a strided load with a stride of x0.

5419if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {

5420SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});

5421SDValue IntID =

5422 DAG.getTargetConstant(Intrinsic::riscv_vlse,DL, XLenVT);

5423SDValue Ops[] = {Ld->getChain(),

5424 IntID,

5425 DAG.getUNDEF(ContainerVT),

5426 NewAddr,

5427 DAG.getRegister(RISCV::X0, XLenVT),

5428 VL};

5429SDValue NewLoad = DAG.getMemIntrinsicNode(

5430ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops, SVT,

5431 DAG.getMachineFunction().getMachineMemOperand(

5432 Ld->getMemOperand(),Offset, SVT.getStoreSize()));

5433 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);

5434returnconvertFromScalableVector(VT, NewLoad, DAG, Subtarget);

5435 }

5436

5437MVT SplatVT = ContainerVT;

5438

5439// f16 with zvfhmin and bf16 need to use an integer scalar load.

5440if (SVT == MVT::bf16 ||

5441 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {

5442 SVT = MVT::i16;

5443 SplatVT = ContainerVT.changeVectorElementType(SVT);

5444 }

5445

5446// Otherwise use a scalar load and splat. This will give the best

5447// opportunity to fold a splat into the operation. ISel can turn it into

5448// the x0 strided load if we aren't able to fold away the select.

5449if (SVT.isFloatingPoint())

5450 V = DAG.getLoad(SVT,DL, Ld->getChain(), NewAddr,

5451 Ld->getPointerInfo().getWithOffset(Offset),

5452 Ld->getOriginalAlign(),

5453 Ld->getMemOperand()->getFlags());

5454else

5455 V = DAG.getExtLoad(ISD::EXTLOAD,DL, XLenVT, Ld->getChain(), NewAddr,

5456 Ld->getPointerInfo().getWithOffset(Offset), SVT,

5457 Ld->getOriginalAlign(),

5458 Ld->getMemOperand()->getFlags());

5459 DAG.makeEquivalentMemoryOrdering(Ld, V);

5460

5461unsigned Opc = SplatVT.isFloatingPoint() ?RISCVISD::VFMV_V_F_VL

5462 :RISCVISD::VMV_V_X_VL;

5463SDValue Splat =

5464 DAG.getNode(Opc,DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);

5465Splat = DAG.getBitcast(ContainerVT,Splat);

5466returnconvertFromScalableVector(VT,Splat, DAG, Subtarget);

5467 }

5468

5469 V1 =convertToScalableVector(ContainerVT, V1, DAG, Subtarget);

5470assert(Lane < (int)NumElts &&"Unexpected lane!");

5471SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL,DL, ContainerVT,

5472 V1, DAG.getConstant(Lane,DL, XLenVT),

5473 DAG.getUNDEF(ContainerVT), TrueMask, VL);

5474returnconvertFromScalableVector(VT, Gather, DAG, Subtarget);

5475 }

5476 }

5477

5478// For exact VLEN m2 or greater, try to split to m1 operations if we

5479// can split cleanly.

5480if (SDValue V =lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))

5481return V;

5482

5483ArrayRef<int> Mask = SVN->getMask();

5484

5485if (SDValue V =

5486lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))

5487return V;

5488

5489if (SDValue V =

5490lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))

5491return V;

5492

5493// A bitrotate will be one instruction on Zvkb, so try to lower to it first if

5494// available.

5495if (Subtarget.hasStdExtZvkb())

5496if (SDValue V =lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))

5497return V;

5498

5499// Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may

5500// be undef which can be handled with a single SLIDEDOWN/UP.

5501int LoSrc, HiSrc;

5502int Rotation =isElementRotate(LoSrc, HiSrc, Mask);

5503if (Rotation > 0) {

5504SDValue LoV, HiV;

5505if (LoSrc >= 0) {

5506 LoV = LoSrc == 0 ? V1 : V2;

5507 LoV =convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);

5508 }

5509if (HiSrc >= 0) {

5510 HiV = HiSrc == 0 ? V1 : V2;

5511 HiV =convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);

5512 }

5513

5514// We found a rotation. We need to slide HiV down by Rotation. Then we need

5515// to slide LoV up by (NumElts - Rotation).

5516unsigned InvRotate = NumElts - Rotation;

5517

5518SDValue Res = DAG.getUNDEF(ContainerVT);

5519if (HiV) {

5520// Even though we could use a smaller VL, don't to avoid a vsetivli

5521// toggle.

5522 Res =getVSlidedown(DAG, Subtarget,DL, ContainerVT, Res, HiV,

5523 DAG.getConstant(Rotation,DL, XLenVT), TrueMask, VL);

5524 }

5525if (LoV)

5526 Res =getVSlideup(DAG, Subtarget,DL, ContainerVT, Res, LoV,

5527 DAG.getConstant(InvRotate,DL, XLenVT), TrueMask, VL,

5528RISCVII::TAIL_AGNOSTIC);

5529

5530returnconvertFromScalableVector(VT, Res, DAG, Subtarget);

5531 }

5532

5533if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())

5534return DAG.getNode(ISD::VECTOR_REVERSE,DL, VT, V1);

5535

5536// If this is a deinterleave(2,4,8) and we can widen the vector, then we can

5537// use shift and truncate to perform the shuffle.

5538// TODO: For Factor=6, we can perform the first step of the deinterleave via

5539// shift-and-trunc reducing total cost for everything except an mf8 result.

5540// TODO: For Factor=4,8, we can do the same when the ratio isn't high enough

5541// to do the entire operation.

5542if (VT.getScalarSizeInBits() < Subtarget.getELen()) {

5543constunsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();

5544assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);

5545for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {

5546unsigned Index = 0;

5547if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&

5548 1 <count_if(Mask, [](intIdx) {returnIdx != -1; })) {

5549if (SDValue Src =getSingleShuffleSrc(VT, ContainerVT, V1, V2))

5550returngetDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);

5551 }

5552 }

5553 }

5554

5555if (SDValue V =

5556lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))

5557return V;

5558

5559// Detect an interleave shuffle and lower to

5560// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))

5561int EvenSrc, OddSrc;

5562if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {

5563// Extract the halves of the vectors.

5564MVT HalfVT = VT.getHalfNumVectorElementsVT();

5565

5566// Recognize if one half is actually undef; the matching above will

5567// otherwise reuse the even stream for the undef one. This improves

5568// spread(2) shuffles.

5569bool LaneIsUndef[2] = {true,true};

5570for (unsigned i = 0; i < Mask.size(); i++)

5571 LaneIsUndef[i % 2] &= (Mask[i] == -1);

5572

5573intSize = Mask.size();

5574SDValue EvenV, OddV;

5575if (LaneIsUndef[0]) {

5576 EvenV = DAG.getUNDEF(HalfVT);

5577 }else {

5578assert(EvenSrc >= 0 &&"Undef source?");

5579 EvenV = (EvenSrc /Size) == 0 ? V1 : V2;

5580 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, HalfVT, EvenV,

5581 DAG.getVectorIdxConstant(EvenSrc %Size,DL));

5582 }

5583

5584if (LaneIsUndef[1]) {

5585 OddV = DAG.getUNDEF(HalfVT);

5586 }else {

5587assert(OddSrc >= 0 &&"Undef source?");

5588 OddV = (OddSrc /Size) == 0 ? V1 : V2;

5589 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, HalfVT, OddV,

5590 DAG.getVectorIdxConstant(OddSrc %Size,DL));

5591 }

5592

5593returngetWideningInterleave(EvenV, OddV,DL, DAG, Subtarget);

5594 }

5595

5596

5597// Handle any remaining single source shuffles

5598assert(!V1.isUndef() &&"Unexpected shuffle canonicalization");

5599if (V2.isUndef()) {

5600// We might be able to express the shuffle as a bitrotate. But even if we

5601// don't have Zvkb and have to expand, the expanded sequence of approx. 2

5602// shifts and a vor will have a higher throughput than a vrgather.

5603if (SDValue V =lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))

5604return V;

5605

5606// Before hitting generic lowering fallbacks, try to widen the mask

5607// to a wider SEW.

5608if (SDValue V =tryWidenMaskForShuffle(Op, DAG))

5609return V;

5610

5611// Can we generate a vcompress instead of a vrgather? These scale better

5612// at high LMUL, at the cost of not being able to fold a following select

5613// into them. The mask constants are also smaller than the index vector

5614// constants, and thus easier to materialize.

5615if (isCompressMask(Mask)) {

5616SmallVector<SDValue> MaskVals(NumElts,

5617 DAG.getConstant(false,DL, XLenVT));

5618for (autoIdx : Mask) {

5619if (Idx == -1)

5620break;

5621assert(Idx >= 0 && (unsigned)Idx < NumElts);

5622 MaskVals[Idx] = DAG.getConstant(true,DL, XLenVT);

5623 }

5624MVT MaskVT =MVT::getVectorVT(MVT::i1, NumElts);

5625SDValue CompressMask = DAG.getBuildVector(MaskVT,DL, MaskVals);

5626return DAG.getNode(ISD::VECTOR_COMPRESS,DL, VT, V1, CompressMask,

5627 DAG.getUNDEF(VT));

5628 }

5629

5630// Match a spread(4,8) which can be done via extend and shift. Spread(2)

5631// is fully covered in interleave(2) above, so it is ignored here.

5632if (VT.getScalarSizeInBits() < Subtarget.getELen()) {

5633unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();

5634assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);

5635for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {

5636unsigned Index;

5637if (isSpreadMask(Mask, Factor, Index)) {

5638MVT NarrowVT =

5639MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);

5640SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, NarrowVT, V1,

5641 DAG.getVectorIdxConstant(0,DL));

5642returngetWideningSpread(Src, Factor, Index,DL, DAG);

5643 }

5644 }

5645 }

5646

5647if (VT.getScalarSizeInBits() == 8 &&

5648any_of(Mask, [&](constauto &Idx) {returnIdx > 255; })) {

5649// On such a vector we're unable to use i8 as the index type.

5650// FIXME: We could promote the index to i16 and use vrgatherei16, but that

5651// may involve vector splitting if we're already at LMUL=8, or our

5652// user-supplied maximum fixed-length LMUL.

5653returnSDValue();

5654 }

5655

5656// Base case for the two operand recursion below - handle the worst case

5657// single source shuffle.

5658unsigned GatherVVOpc =RISCVISD::VRGATHER_VV_VL;

5659MVT IndexVT = VT.changeTypeToInteger();

5660// Since we can't introduce illegal index types at this stage, use i16 and

5661// vrgatherei16 if the corresponding index type for plain vrgather is greater

5662// than XLenVT.

5663if (IndexVT.getScalarType().bitsGT(XLenVT)) {

5664 GatherVVOpc =RISCVISD::VRGATHEREI16_VV_VL;

5665 IndexVT = IndexVT.changeVectorElementType(MVT::i16);

5666 }

5667

5668// If the mask allows, we can do all the index computation in 16 bits. This

5669// requires less work and less register pressure at high LMUL, and creates

5670// smaller constants which may be cheaper to materialize.

5671if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&

5672 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {

5673 GatherVVOpc =RISCVISD::VRGATHEREI16_VV_VL;

5674 IndexVT = IndexVT.changeVectorElementType(MVT::i16);

5675 }

5676

5677MVT IndexContainerVT =

5678 ContainerVT.changeVectorElementType(IndexVT.getScalarType());

5679

5680 V1 =convertToScalableVector(ContainerVT, V1, DAG, Subtarget);

5681SmallVector<SDValue> GatherIndicesLHS;

5682for (int MaskIndex : Mask) {

5683bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;

5684 GatherIndicesLHS.push_back(IsLHSIndex

5685 ? DAG.getConstant(MaskIndex,DL, XLenVT)

5686 : DAG.getUNDEF(XLenVT));

5687 }

5688SDValue LHSIndices = DAG.getBuildVector(IndexVT,DL, GatherIndicesLHS);

5689 LHSIndices =convertToScalableVector(IndexContainerVT, LHSIndices, DAG,

5690 Subtarget);

5691SDValue Gather = DAG.getNode(GatherVVOpc,DL, ContainerVT, V1, LHSIndices,

5692 DAG.getUNDEF(ContainerVT), TrueMask, VL);

5693returnconvertFromScalableVector(VT, Gather, DAG, Subtarget);

5694 }

5695

5696// As a backup, shuffles can be lowered via a vrgather instruction, possibly

5697// merged with a second vrgather.

5698SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;

5699

5700// Now construct the mask that will be used by the blended vrgather operation.

5701// Construct the appropriate indices into each vector.

5702for (int MaskIndex : Mask) {

5703bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;

5704 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0

5705 ? MaskIndex : -1);

5706 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));

5707 }

5708

5709// If the mask indices are disjoint between the two sources, we can lower it

5710// as a vselect + a single source vrgather.vv. Don't do this if we think the

5711// operands may end up being lowered to something cheaper than a vrgather.vv.

5712if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&

5713 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&

5714 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&

5715 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&

5716 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))

5717if (SDValue V =lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))

5718return V;

5719

5720// Before hitting generic lowering fallbacks, try to widen the mask

5721// to a wider SEW.

5722if (SDValue V =tryWidenMaskForShuffle(Op, DAG))

5723return V;

5724

5725// Try to pick a profitable operand order.

5726bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);

5727 SwapOps = SwapOps ^ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);

5728

5729// Recursively invoke lowering for each operand if we had two

5730// independent single source shuffles, and then combine the result via a

5731// vselect. Note that the vselect will likely be folded back into the

5732// second permute (vrgather, or other) by the post-isel combine.

5733 V1 = DAG.getVectorShuffle(VT,DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);

5734 V2 = DAG.getVectorShuffle(VT,DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);

5735

5736SmallVector<SDValue> MaskVals;

5737for (int MaskIndex : Mask) {

5738bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;

5739 MaskVals.push_back(DAG.getConstant(SelectMaskVal,DL, XLenVT));

5740 }

5741

5742assert(MaskVals.size() == NumElts &&"Unexpected select-like shuffle");

5743MVT MaskVT =MVT::getVectorVT(MVT::i1, NumElts);

5744SDValue SelectMask = DAG.getBuildVector(MaskVT,DL, MaskVals);

5745

5746if (SwapOps)

5747return DAG.getNode(ISD::VSELECT,DL, VT, SelectMask, V1, V2);

5748return DAG.getNode(ISD::VSELECT,DL, VT, SelectMask, V2, V1);

5749}

5750

5751boolRISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M,EVT VT) const{

5752// Only support legal VTs for other shuffles for now.

5753if (!isTypeLegal(VT))

5754returnfalse;

5755

5756// Support splats for any type. These should type legalize well.

5757if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))

5758returntrue;

5759

5760MVT SVT = VT.getSimpleVT();

5761

5762// Not for i1 vectors.

5763if (SVT.getScalarType() == MVT::i1)

5764returnfalse;

5765

5766int Dummy1, Dummy2;

5767return (isElementRotate(Dummy1, Dummy2, M) > 0) ||

5768isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);

5769}

5770

5771// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting

5772// the exponent.

5773SDValue

5774RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,

5775SelectionDAG &DAG) const{

5776MVT VT =Op.getSimpleValueType();

5777unsigned EltSize = VT.getScalarSizeInBits();

5778SDValue Src =Op.getOperand(0);

5779SDLoc DL(Op);

5780MVT ContainerVT = VT;

5781

5782SDValue Mask, VL;

5783if (Op->isVPOpcode()) {

5784 Mask =Op.getOperand(1);

5785if (VT.isFixedLengthVector())

5786 Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,

5787 Subtarget);

5788 VL =Op.getOperand(2);

5789 }

5790

5791// We choose FP type that can represent the value if possible. Otherwise, we

5792// use rounding to zero conversion for correct exponent of the result.

5793// TODO: Use f16 for i8 when possible?

5794MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;

5795if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))

5796 FloatEltVT = MVT::f32;

5797MVT FloatVT =MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());

5798

5799// Legal types should have been checked in the RISCVTargetLowering

5800// constructor.

5801// TODO: Splitting may make sense in some cases.

5802assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&

5803"Expected legal float type!");

5804

5805// For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.

5806// The trailing zero count is equal to log2 of this single bit value.

5807if (Op.getOpcode() ==ISD::CTTZ_ZERO_UNDEF) {

5808SDValue Neg = DAG.getNegative(Src,DL, VT);

5809 Src = DAG.getNode(ISD::AND,DL, VT, Src, Neg);

5810 }elseif (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {

5811SDValue Neg = DAG.getNode(ISD::VP_SUB,DL, VT, DAG.getConstant(0,DL, VT),

5812 Src, Mask, VL);

5813 Src = DAG.getNode(ISD::VP_AND,DL, VT, Src, Neg, Mask, VL);

5814 }

5815

5816// We have a legal FP type, convert to it.

5817SDValue FloatVal;

5818if (FloatVT.bitsGT(VT)) {

5819if (Op->isVPOpcode())

5820 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP,DL, FloatVT, Src, Mask, VL);

5821else

5822 FloatVal = DAG.getNode(ISD::UINT_TO_FP,DL, FloatVT, Src);

5823 }else {

5824// Use RTZ to avoid rounding influencing exponent of FloatVal.

5825if (VT.isFixedLengthVector()) {

5826 ContainerVT =getContainerForFixedLengthVector(VT);

5827 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

5828 }

5829if (!Op->isVPOpcode())

5830 std::tie(Mask, VL) =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

5831SDValue RTZRM =

5832 DAG.getTargetConstant(RISCVFPRndMode::RTZ,DL, Subtarget.getXLenVT());

5833MVT ContainerFloatVT =

5834MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());

5835 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL,DL, ContainerFloatVT,

5836 Src, Mask, RTZRM, VL);

5837if (VT.isFixedLengthVector())

5838 FloatVal =convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);

5839 }

5840// Bitcast to integer and shift the exponent to the LSB.

5841EVT IntVT = FloatVT.changeVectorElementTypeToInteger();

5842SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);

5843unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;

5844

5845SDValue Exp;

5846// Restore back to original type. Truncation after SRL is to generate vnsrl.

5847if (Op->isVPOpcode()) {

5848Exp = DAG.getNode(ISD::VP_SRL,DL, IntVT, Bitcast,

5849 DAG.getConstant(ShiftAmt,DL, IntVT), Mask, VL);

5850Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);

5851 }else {

5852Exp = DAG.getNode(ISD::SRL,DL, IntVT, Bitcast,

5853 DAG.getConstant(ShiftAmt,DL, IntVT));

5854if (IntVT.bitsLT(VT))

5855Exp = DAG.getNode(ISD::ZERO_EXTEND,DL, VT, Exp);

5856elseif (IntVT.bitsGT(VT))

5857Exp = DAG.getNode(ISD::TRUNCATE,DL, VT, Exp);

5858 }

5859

5860// The exponent contains log2 of the value in biased form.

5861unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;

5862// For trailing zeros, we just need to subtract the bias.

5863if (Op.getOpcode() ==ISD::CTTZ_ZERO_UNDEF)

5864return DAG.getNode(ISD::SUB,DL, VT, Exp,

5865 DAG.getConstant(ExponentBias,DL, VT));

5866if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)

5867return DAG.getNode(ISD::VP_SUB,DL, VT, Exp,

5868 DAG.getConstant(ExponentBias,DL, VT), Mask, VL);

5869

5870// For leading zeros, we need to remove the bias and convert from log2 to

5871// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).

5872unsigned Adjust = ExponentBias + (EltSize - 1);

5873SDValue Res;

5874if (Op->isVPOpcode())

5875 Res = DAG.getNode(ISD::VP_SUB,DL, VT, DAG.getConstant(Adjust,DL, VT),Exp,

5876Mask, VL);

5877else

5878 Res = DAG.getNode(ISD::SUB,DL, VT, DAG.getConstant(Adjust,DL, VT),Exp);

5879

5880// The above result with zero input equals to Adjust which is greater than

5881// EltSize. Hence, we can do min(Res, EltSize) for CTLZ.

5882if (Op.getOpcode() ==ISD::CTLZ)

5883 Res = DAG.getNode(ISD::UMIN,DL, VT, Res, DAG.getConstant(EltSize,DL, VT));

5884elseif (Op.getOpcode() == ISD::VP_CTLZ)

5885 Res = DAG.getNode(ISD::VP_UMIN,DL, VT, Res,

5886 DAG.getConstant(EltSize,DL, VT),Mask, VL);

5887return Res;

5888}

5889

5890SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,

5891SelectionDAG &DAG) const{

5892SDLoc DL(Op);

5893MVT XLenVT = Subtarget.getXLenVT();

5894SDValue Source =Op->getOperand(0);

5895MVT SrcVT =Source.getSimpleValueType();

5896SDValue Mask =Op->getOperand(1);

5897SDValue EVL =Op->getOperand(2);

5898

5899if (SrcVT.isFixedLengthVector()) {

5900MVT ContainerVT =getContainerForFixedLengthVector(SrcVT);

5901Source =convertToScalableVector(ContainerVT, Source, DAG, Subtarget);

5902Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,

5903 Subtarget);

5904 SrcVT = ContainerVT;

5905 }

5906

5907// Convert to boolean vector.

5908if (SrcVT.getScalarType() != MVT::i1) {

5909SDValue AllZero = DAG.getConstant(0,DL, SrcVT);

5910 SrcVT =MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());

5911Source = DAG.getNode(RISCVISD::SETCC_VL,DL, SrcVT,

5912 {Source, AllZero, DAG.getCondCode(ISD::SETNE),

5913 DAG.getUNDEF(SrcVT), Mask, EVL});

5914 }

5915

5916SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL,DL, XLenVT, Source, Mask, EVL);

5917if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)

5918// In this case, we can interpret poison as -1, so nothing to do further.

5919return Res;

5920

5921// Convert -1 to VL.

5922SDValue SetCC =

5923 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0,DL, XLenVT),ISD::SETLT);

5924 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);

5925return DAG.getNode(ISD::TRUNCATE,DL,Op.getValueType(), Res);

5926}

5927

5928// While RVV has alignment restrictions, we should always be able to load as a

5929// legal equivalently-sized byte-typed vector instead. This method is

5930// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If

5931// the load is already correctly-aligned, it returns SDValue().

5932SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,

5933SelectionDAG &DAG) const{

5934auto *Load = cast<LoadSDNode>(Op);

5935assert(Load &&Load->getMemoryVT().isVector() &&"Expected vector load");

5936

5937if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

5938Load->getMemoryVT(),

5939 *Load->getMemOperand()))

5940returnSDValue();

5941

5942SDLoc DL(Op);

5943MVT VT =Op.getSimpleValueType();

5944unsigned EltSizeBits = VT.getScalarSizeInBits();

5945assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&

5946"Unexpected unaligned RVV load type");

5947MVT NewVT =

5948MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));

5949assert(NewVT.isValid() &&

5950"Expecting equally-sized RVV vector types to be legal");

5951SDValue L = DAG.getLoad(NewVT,DL,Load->getChain(),Load->getBasePtr(),

5952Load->getPointerInfo(),Load->getOriginalAlign(),

5953Load->getMemOperand()->getFlags());

5954return DAG.getMergeValues({DAG.getBitcast(VT, L),L.getValue(1)},DL);

5955}

5956

5957// While RVV has alignment restrictions, we should always be able to store as a

5958// legal equivalently-sized byte-typed vector instead. This method is

5959// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It

5960// returns SDValue() if the store is already correctly aligned.

5961SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,

5962SelectionDAG &DAG) const{

5963auto *Store = cast<StoreSDNode>(Op);

5964assert(Store &&Store->getValue().getValueType().isVector() &&

5965"Expected vector store");

5966

5967if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

5968Store->getMemoryVT(),

5969 *Store->getMemOperand()))

5970returnSDValue();

5971

5972SDLoc DL(Op);

5973SDValue StoredVal =Store->getValue();

5974MVT VT = StoredVal.getSimpleValueType();

5975unsigned EltSizeBits = VT.getScalarSizeInBits();

5976assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&

5977"Unexpected unaligned RVV store type");

5978MVT NewVT =

5979MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));

5980assert(NewVT.isValid() &&

5981"Expecting equally-sized RVV vector types to be legal");

5982 StoredVal = DAG.getBitcast(NewVT, StoredVal);

5983return DAG.getStore(Store->getChain(),DL, StoredVal,Store->getBasePtr(),

5984Store->getPointerInfo(),Store->getOriginalAlign(),

5985Store->getMemOperand()->getFlags());

5986}

5987

5988staticSDValue lowerConstant(SDValue Op,SelectionDAG &DAG,

5989constRISCVSubtarget &Subtarget) {

5990assert(Op.getValueType() == MVT::i64 &&"Unexpected VT");

5991

5992 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();

5993

5994// All simm32 constants should be handled by isel.

5995// NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making

5996// this check redundant, but small immediates are common so this check

5997// should have better compile time.

5998if (isInt<32>(Imm))

5999returnOp;

6000

6001// We only need to cost the immediate, if constant pool lowering is enabled.

6002if (!Subtarget.useConstantPoolForLargeInts())

6003returnOp;

6004

6005RISCVMatInt::InstSeq Seq =RISCVMatInt::generateInstSeq(Imm, Subtarget);

6006if (Seq.size() <= Subtarget.getMaxBuildIntsCost())

6007returnOp;

6008

6009// Optimizations below are disabled for opt size. If we're optimizing for

6010// size, use a constant pool.

6011if (DAG.shouldOptForSize())

6012returnSDValue();

6013

6014// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do

6015// that if it will avoid a constant pool.

6016// It will require an extra temporary register though.

6017// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where

6018// low and high 32 bits are the same and bit 31 and 63 are set.

6019unsigned ShiftAmt, AddOpc;

6020RISCVMatInt::InstSeq SeqLo =

6021RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);

6022if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())

6023returnOp;

6024

6025returnSDValue();

6026}

6027

6028SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,

6029SelectionDAG &DAG) const{

6030MVT VT =Op.getSimpleValueType();

6031constAPFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();

6032

6033// Can this constant be selected by a Zfa FLI instruction?

6034bool Negate =false;

6035intIndex =getLegalZfaFPImm(Imm, VT);

6036

6037// If the constant is negative, try negating.

6038if (Index < 0 &&Imm.isNegative()) {

6039Index =getLegalZfaFPImm(-Imm, VT);

6040 Negate =true;

6041 }

6042

6043// If we couldn't find a FLI lowering, fall back to generic code.

6044if (Index < 0)

6045returnSDValue();

6046

6047// Emit an FLI+FNEG. We use a custom node to hide from constant folding.

6048SDLoc DL(Op);

6049SDValue Const =

6050 DAG.getNode(RISCVISD::FLI,DL, VT,

6051 DAG.getTargetConstant(Index,DL, Subtarget.getXLenVT()));

6052if (!Negate)

6053returnConst;

6054

6055return DAG.getNode(ISD::FNEG,DL, VT, Const);

6056}

6057

6058staticSDValue LowerATOMIC_FENCE(SDValue Op,SelectionDAG &DAG,

6059constRISCVSubtarget &Subtarget) {

6060SDLoc dl(Op);

6061AtomicOrdering FenceOrdering =

6062static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));

6063SyncScope::ID FenceSSID =

6064static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));

6065

6066if (Subtarget.hasStdExtZtso()) {

6067// The only fence that needs an instruction is a sequentially-consistent

6068// cross-thread fence.

6069if (FenceOrdering ==AtomicOrdering::SequentiallyConsistent &&

6070 FenceSSID ==SyncScope::System)

6071returnOp;

6072

6073// MEMBARRIER is a compiler barrier; it codegens to a no-op.

6074return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other,Op.getOperand(0));

6075 }

6076

6077// singlethread fences only synchronize with signal handlers on the same

6078// thread and thus only need to preserve instruction order, not actually

6079// enforce memory ordering.

6080if (FenceSSID ==SyncScope::SingleThread)

6081// MEMBARRIER is a compiler barrier; it codegens to a no-op.

6082return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other,Op.getOperand(0));

6083

6084returnOp;

6085}

6086

6087SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,

6088SelectionDAG &DAG) const{

6089SDLoc DL(Op);

6090MVT VT =Op.getSimpleValueType();

6091MVT XLenVT = Subtarget.getXLenVT();

6092unsignedCheck =Op.getConstantOperandVal(1);

6093unsigned TDCMask = 0;

6094if (Check &fcSNan)

6095 TDCMask |=RISCV::FPMASK_Signaling_NaN;

6096if (Check &fcQNan)

6097 TDCMask |=RISCV::FPMASK_Quiet_NaN;

6098if (Check &fcPosInf)

6099 TDCMask |=RISCV::FPMASK_Positive_Infinity;

6100if (Check &fcNegInf)

6101 TDCMask |=RISCV::FPMASK_Negative_Infinity;

6102if (Check &fcPosNormal)

6103 TDCMask |=RISCV::FPMASK_Positive_Normal;

6104if (Check &fcNegNormal)

6105 TDCMask |=RISCV::FPMASK_Negative_Normal;

6106if (Check &fcPosSubnormal)

6107 TDCMask |=RISCV::FPMASK_Positive_Subnormal;

6108if (Check &fcNegSubnormal)

6109 TDCMask |=RISCV::FPMASK_Negative_Subnormal;

6110if (Check &fcPosZero)

6111 TDCMask |=RISCV::FPMASK_Positive_Zero;

6112if (Check &fcNegZero)

6113 TDCMask |=RISCV::FPMASK_Negative_Zero;

6114

6115bool IsOneBitMask =isPowerOf2_32(TDCMask);

6116

6117SDValue TDCMaskV = DAG.getConstant(TDCMask,DL, XLenVT);

6118

6119if (VT.isVector()) {

6120SDValue Op0 =Op.getOperand(0);

6121MVT VT0 =Op.getOperand(0).getSimpleValueType();

6122

6123if (VT.isScalableVector()) {

6124MVT DstVT = VT0.changeVectorElementTypeToInteger();

6125auto [Mask, VL] =getDefaultScalableVLOps(VT0,DL, DAG, Subtarget);

6126if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {

6127Mask =Op.getOperand(2);

6128 VL =Op.getOperand(3);

6129 }

6130SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL,DL, DstVT, Op0, Mask,

6131 VL,Op->getFlags());

6132if (IsOneBitMask)

6133return DAG.getSetCC(DL, VT, FPCLASS,

6134 DAG.getConstant(TDCMask,DL, DstVT),

6135ISD::CondCode::SETEQ);

6136SDValue AND = DAG.getNode(ISD::AND,DL, DstVT, FPCLASS,

6137 DAG.getConstant(TDCMask,DL, DstVT));

6138return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0,DL, DstVT),

6139ISD::SETNE);

6140 }

6141

6142MVT ContainerVT0 =getContainerForFixedLengthVector(VT0);

6143MVT ContainerVT =getContainerForFixedLengthVector(VT);

6144MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();

6145auto [Mask, VL] =getDefaultVLOps(VT0, ContainerVT0,DL, DAG, Subtarget);

6146if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {

6147Mask =Op.getOperand(2);

6148MVT MaskContainerVT =

6149getContainerForFixedLengthVector(Mask.getSimpleValueType());

6150Mask =convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);

6151 VL =Op.getOperand(3);

6152 }

6153 Op0 =convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);

6154

6155SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL,DL, ContainerDstVT, Op0,

6156 Mask, VL,Op->getFlags());

6157

6158 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerDstVT,

6159 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);

6160if (IsOneBitMask) {

6161SDValue VMSEQ =

6162 DAG.getNode(RISCVISD::SETCC_VL,DL, ContainerVT,

6163 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),

6164 DAG.getUNDEF(ContainerVT), Mask, VL});

6165returnconvertFromScalableVector(VT, VMSEQ, DAG, Subtarget);

6166 }

6167SDValue AND = DAG.getNode(RISCVISD::AND_VL,DL, ContainerDstVT, FPCLASS,

6168 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);

6169

6170SDValue SplatZero = DAG.getConstant(0,DL, XLenVT);

6171 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerDstVT,

6172 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);

6173

6174SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL,DL, ContainerVT,

6175 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),

6176 DAG.getUNDEF(ContainerVT), Mask, VL});

6177returnconvertFromScalableVector(VT, VMSNE, DAG, Subtarget);

6178 }

6179

6180SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS,DL, XLenVT,Op.getOperand(0));

6181SDValue AND = DAG.getNode(ISD::AND,DL, XLenVT, FCLASS, TDCMaskV);

6182SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0,DL, XLenVT),

6183ISD::CondCode::SETNE);

6184return DAG.getNode(ISD::TRUNCATE,DL, VT, Res);

6185}

6186

6187// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these

6188// operations propagate nans.

6189staticSDValue lowerFMAXIMUM_FMINIMUM(SDValue Op,SelectionDAG &DAG,

6190constRISCVSubtarget &Subtarget) {

6191SDLoc DL(Op);

6192MVT VT =Op.getSimpleValueType();

6193

6194SDValue X =Op.getOperand(0);

6195SDValue Y =Op.getOperand(1);

6196

6197if (!VT.isVector()) {

6198MVT XLenVT = Subtarget.getXLenVT();

6199

6200// If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This

6201// ensures that when one input is a nan, the other will also be a nan

6202// allowing the nan to propagate. If both inputs are nan, this will swap the

6203// inputs which is harmless.

6204

6205SDValue NewY =Y;

6206if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {

6207SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT,X,X,ISD::SETOEQ);

6208 NewY = DAG.getSelect(DL, VT, XIsNonNan,Y,X);

6209 }

6210

6211SDValue NewX =X;

6212if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {

6213SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT,Y,Y,ISD::SETOEQ);

6214 NewX = DAG.getSelect(DL, VT, YIsNonNan,X,Y);

6215 }

6216

6217unsigned Opc =

6218Op.getOpcode() ==ISD::FMAXIMUM ?RISCVISD::FMAX :RISCVISD::FMIN;

6219return DAG.getNode(Opc,DL, VT, NewX, NewY);

6220 }

6221

6222// Check no NaNs before converting to fixed vector scalable.

6223bool XIsNeverNan =Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);

6224bool YIsNeverNan =Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);

6225

6226MVT ContainerVT = VT;

6227if (VT.isFixedLengthVector()) {

6228 ContainerVT =getContainerForFixedLengthVector(DAG, VT, Subtarget);

6229X =convertToScalableVector(ContainerVT,X, DAG, Subtarget);

6230Y =convertToScalableVector(ContainerVT,Y, DAG, Subtarget);

6231 }

6232

6233SDValue Mask, VL;

6234if (Op->isVPOpcode()) {

6235 Mask =Op.getOperand(2);

6236if (VT.isFixedLengthVector())

6237 Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,

6238 Subtarget);

6239 VL =Op.getOperand(3);

6240 }else {

6241 std::tie(Mask, VL) =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

6242 }

6243

6244SDValue NewY =Y;

6245if (!XIsNeverNan) {

6246SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL,DL, Mask.getValueType(),

6247 {X, X, DAG.getCondCode(ISD::SETOEQ),

6248 DAG.getUNDEF(ContainerVT), Mask, VL});

6249 NewY = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, XIsNonNan,Y,X,

6250 DAG.getUNDEF(ContainerVT), VL);

6251 }

6252

6253SDValue NewX =X;

6254if (!YIsNeverNan) {

6255SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL,DL, Mask.getValueType(),

6256 {Y, Y, DAG.getCondCode(ISD::SETOEQ),

6257 DAG.getUNDEF(ContainerVT), Mask, VL});

6258 NewX = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, YIsNonNan,X,Y,

6259 DAG.getUNDEF(ContainerVT), VL);

6260 }

6261

6262unsigned Opc =

6263Op.getOpcode() ==ISD::FMAXIMUM ||Op->getOpcode() == ISD::VP_FMAXIMUM

6264 ?RISCVISD::VFMAX_VL

6265 :RISCVISD::VFMIN_VL;

6266SDValue Res = DAG.getNode(Opc,DL, ContainerVT, NewX, NewY,

6267 DAG.getUNDEF(ContainerVT), Mask, VL);

6268if (VT.isFixedLengthVector())

6269 Res =convertFromScalableVector(VT, Res, DAG, Subtarget);

6270return Res;

6271}

6272

6273staticSDValue lowerFABSorFNEG(SDValue Op,SelectionDAG &DAG,

6274constRISCVSubtarget &Subtarget) {

6275bool IsFABS =Op.getOpcode() ==ISD::FABS;

6276assert((IsFABS ||Op.getOpcode() ==ISD::FNEG) &&

6277"Wrong opcode for lowering FABS or FNEG.");

6278

6279MVT XLenVT = Subtarget.getXLenVT();

6280MVT VT =Op.getSimpleValueType();

6281assert((VT == MVT::f16 || VT == MVT::bf16) &&"Unexpected type");

6282

6283SDLoc DL(Op);

6284SDValue Fmv =

6285 DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT,Op.getOperand(0));

6286

6287APInt Mask = IsFABS ?APInt::getSignedMaxValue(16) :APInt::getSignMask(16);

6288 Mask = Mask.sext(Subtarget.getXLen());

6289

6290unsigned LogicOpc = IsFABS ?ISD::AND :ISD::XOR;

6291SDValue Logic =

6292 DAG.getNode(LogicOpc,DL, XLenVT, Fmv, DAG.getConstant(Mask,DL, XLenVT));

6293return DAG.getNode(RISCVISD::FMV_H_X,DL, VT, Logic);

6294}

6295

6296staticSDValue lowerFCOPYSIGN(SDValue Op,SelectionDAG &DAG,

6297constRISCVSubtarget &Subtarget) {

6298assert(Op.getOpcode() ==ISD::FCOPYSIGN &&"Unexpected opcode");

6299

6300MVT XLenVT = Subtarget.getXLenVT();

6301MVT VT =Op.getSimpleValueType();

6302assert((VT == MVT::f16 || VT == MVT::bf16) &&"Unexpected type");

6303

6304SDValue Mag =Op.getOperand(0);

6305SDValue Sign =Op.getOperand(1);

6306

6307SDLoc DL(Op);

6308

6309// Get sign bit into an integer value.

6310SDValue SignAsInt;

6311unsigned SignSize = Sign.getValueSizeInBits();

6312if (SignSize == Subtarget.getXLen()) {

6313 SignAsInt = DAG.getNode(ISD::BITCAST,DL, XLenVT, Sign);

6314 }elseif (SignSize == 16) {

6315 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Sign);

6316 }elseif (SignSize == 32) {

6317 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, XLenVT, Sign);

6318 }elseif (SignSize == 64) {

6319assert(XLenVT == MVT::i32 &&"Unexpected type");

6320// Copy the upper word to integer.

6321 SignAsInt = DAG.getNode(RISCVISD::SplitF64,DL, {MVT::i32, MVT::i32}, Sign)

6322 .getValue(1);

6323 SignSize = 32;

6324 }else

6325llvm_unreachable("Unexpected sign size");

6326

6327// Get the signbit at the right position for MagAsInt.

6328int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();

6329if (ShiftAmount > 0) {

6330 SignAsInt = DAG.getNode(ISD::SRL,DL, XLenVT, SignAsInt,

6331 DAG.getConstant(ShiftAmount,DL, XLenVT));

6332 }elseif (ShiftAmount < 0) {

6333 SignAsInt = DAG.getNode(ISD::SHL,DL, XLenVT, SignAsInt,

6334 DAG.getConstant(-ShiftAmount,DL, XLenVT));

6335 }

6336

6337// Mask the sign bit and any bits above it. The extra bits will be dropped

6338// when we convert back to FP.

6339SDValue SignMask = DAG.getConstant(

6340APInt::getSignMask(16).sext(Subtarget.getXLen()),DL, XLenVT);

6341SDValue SignBit = DAG.getNode(ISD::AND,DL, XLenVT, SignAsInt, SignMask);

6342

6343// Transform Mag value to integer, and clear the sign bit.

6344SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Mag);

6345SDValue ClearSignMask = DAG.getConstant(

6346APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()),DL, XLenVT);

6347SDValue ClearedSign =

6348 DAG.getNode(ISD::AND,DL, XLenVT, MagAsInt, ClearSignMask);

6349

6350SDValue CopiedSign = DAG.getNode(ISD::OR,DL, XLenVT, ClearedSign, SignBit,

6351SDNodeFlags::Disjoint);

6352

6353return DAG.getNode(RISCVISD::FMV_H_X,DL, VT, CopiedSign);

6354}

6355

6356/// Get a RISC-V target specified VL op for a given SDNode.

6357staticunsignedgetRISCVVLOp(SDValue Op) {

6358#define OP_CASE(NODE) \

6359 case ISD::NODE: \

6360 return RISCVISD::NODE##_VL;

6361#define VP_CASE(NODE) \

6362 case ISD::VP_##NODE: \

6363 return RISCVISD::NODE##_VL;

6364// clang-format off

6365switch (Op.getOpcode()) {

6366default:

6367llvm_unreachable("don't have RISC-V specified VL op for this SDNode");

6368OP_CASE(ADD)

6369OP_CASE(SUB)

6370OP_CASE(MUL)

6371OP_CASE(MULHS)

6372OP_CASE(MULHU)

6373OP_CASE(SDIV)

6374OP_CASE(SREM)

6375OP_CASE(UDIV)

6376OP_CASE(UREM)

6377OP_CASE(SHL)

6378OP_CASE(SRA)

6379OP_CASE(SRL)

6380OP_CASE(ROTL)

6381OP_CASE(ROTR)

6382OP_CASE(BSWAP)

6383OP_CASE(CTTZ)

6384OP_CASE(CTLZ)

6385OP_CASE(CTPOP)

6386OP_CASE(BITREVERSE)

6387OP_CASE(SADDSAT)

6388OP_CASE(UADDSAT)

6389OP_CASE(SSUBSAT)

6390OP_CASE(USUBSAT)

6391OP_CASE(AVGFLOORS)

6392OP_CASE(AVGFLOORU)

6393OP_CASE(AVGCEILS)

6394OP_CASE(AVGCEILU)

6395OP_CASE(FADD)

6396OP_CASE(FSUB)

6397OP_CASE(FMUL)

6398OP_CASE(FDIV)

6399OP_CASE(FNEG)

6400OP_CASE(FABS)

6401OP_CASE(FSQRT)

6402OP_CASE(SMIN)

6403OP_CASE(SMAX)

6404OP_CASE(UMIN)

6405OP_CASE(UMAX)

6406OP_CASE(STRICT_FADD)

6407OP_CASE(STRICT_FSUB)

6408OP_CASE(STRICT_FMUL)

6409OP_CASE(STRICT_FDIV)

6410OP_CASE(STRICT_FSQRT)

6411VP_CASE(ADD)// VP_ADD

6412VP_CASE(SUB)// VP_SUB

6413VP_CASE(MUL)// VP_MUL

6414VP_CASE(SDIV)// VP_SDIV

6415VP_CASE(SREM)// VP_SREM

6416VP_CASE(UDIV)// VP_UDIV

6417VP_CASE(UREM)// VP_UREM

6418VP_CASE(SHL)// VP_SHL

6419VP_CASE(FADD)// VP_FADD

6420VP_CASE(FSUB)// VP_FSUB

6421VP_CASE(FMUL)// VP_FMUL

6422VP_CASE(FDIV)// VP_FDIV

6423VP_CASE(FNEG)// VP_FNEG

6424VP_CASE(FABS)// VP_FABS

6425VP_CASE(SMIN)// VP_SMIN

6426VP_CASE(SMAX)// VP_SMAX

6427VP_CASE(UMIN)// VP_UMIN

6428VP_CASE(UMAX)// VP_UMAX

6429VP_CASE(FCOPYSIGN)// VP_FCOPYSIGN

6430VP_CASE(SETCC)// VP_SETCC

6431VP_CASE(SINT_TO_FP)// VP_SINT_TO_FP

6432VP_CASE(UINT_TO_FP)// VP_UINT_TO_FP

6433VP_CASE(BITREVERSE)// VP_BITREVERSE

6434VP_CASE(SADDSAT)// VP_SADDSAT

6435VP_CASE(UADDSAT)// VP_UADDSAT

6436VP_CASE(SSUBSAT)// VP_SSUBSAT

6437VP_CASE(USUBSAT)// VP_USUBSAT

6438VP_CASE(BSWAP)// VP_BSWAP

6439VP_CASE(CTLZ)// VP_CTLZ

6440VP_CASE(CTTZ)// VP_CTTZ

6441VP_CASE(CTPOP)// VP_CTPOP

6442caseISD::CTLZ_ZERO_UNDEF:

6443case ISD::VP_CTLZ_ZERO_UNDEF:

6444returnRISCVISD::CTLZ_VL;

6445caseISD::CTTZ_ZERO_UNDEF:

6446case ISD::VP_CTTZ_ZERO_UNDEF:

6447returnRISCVISD::CTTZ_VL;

6448caseISD::FMA:

6449case ISD::VP_FMA:

6450returnRISCVISD::VFMADD_VL;

6451caseISD::STRICT_FMA:

6452returnRISCVISD::STRICT_VFMADD_VL;

6453caseISD::AND:

6454case ISD::VP_AND:

6455if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)

6456returnRISCVISD::VMAND_VL;

6457returnRISCVISD::AND_VL;

6458caseISD::OR:

6459case ISD::VP_OR:

6460if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)

6461returnRISCVISD::VMOR_VL;

6462returnRISCVISD::OR_VL;

6463caseISD::XOR:

6464case ISD::VP_XOR:

6465if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)

6466returnRISCVISD::VMXOR_VL;

6467returnRISCVISD::XOR_VL;

6468case ISD::VP_SELECT:

6469case ISD::VP_MERGE:

6470returnRISCVISD::VMERGE_VL;

6471case ISD::VP_SRA:

6472returnRISCVISD::SRA_VL;

6473case ISD::VP_SRL:

6474returnRISCVISD::SRL_VL;

6475case ISD::VP_SQRT:

6476returnRISCVISD::FSQRT_VL;

6477case ISD::VP_SIGN_EXTEND:

6478returnRISCVISD::VSEXT_VL;

6479case ISD::VP_ZERO_EXTEND:

6480returnRISCVISD::VZEXT_VL;

6481case ISD::VP_FP_TO_SINT:

6482returnRISCVISD::VFCVT_RTZ_X_F_VL;

6483case ISD::VP_FP_TO_UINT:

6484returnRISCVISD::VFCVT_RTZ_XU_F_VL;

6485caseISD::FMINNUM:

6486case ISD::VP_FMINNUM:

6487returnRISCVISD::VFMIN_VL;

6488caseISD::FMAXNUM:

6489case ISD::VP_FMAXNUM:

6490returnRISCVISD::VFMAX_VL;

6491caseISD::LRINT:

6492case ISD::VP_LRINT:

6493caseISD::LLRINT:

6494case ISD::VP_LLRINT:

6495returnRISCVISD::VFCVT_RM_X_F_VL;

6496 }

6497// clang-format on

6498#undef OP_CASE

6499#undef VP_CASE

6500}

6501

6502/// Return true if a RISC-V target specified op has a passthru operand.

6503staticboolhasPassthruOp(unsigned Opcode) {

6504assert(Opcode >RISCVISD::FIRST_NUMBER &&

6505 Opcode <=RISCVISD::LAST_STRICTFP_OPCODE &&

6506"not a RISC-V target specific op");

6507static_assert(

6508RISCVISD::LAST_VL_VECTOR_OP -RISCVISD::FIRST_VL_VECTOR_OP == 127 &&

6509RISCVISD::LAST_STRICTFP_OPCODE -RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&

6510"adding target specific op should update this function");

6511if (Opcode >=RISCVISD::ADD_VL && Opcode <=RISCVISD::VFMAX_VL)

6512returntrue;

6513if (Opcode ==RISCVISD::FCOPYSIGN_VL)

6514returntrue;

6515if (Opcode >=RISCVISD::VWMUL_VL && Opcode <=RISCVISD::VFWSUB_W_VL)

6516returntrue;

6517if (Opcode ==RISCVISD::SETCC_VL)

6518returntrue;

6519if (Opcode >=RISCVISD::STRICT_FADD_VL && Opcode <=RISCVISD::STRICT_FDIV_VL)

6520returntrue;

6521if (Opcode ==RISCVISD::VMERGE_VL)

6522returntrue;

6523returnfalse;

6524}

6525

6526/// Return true if a RISC-V target specified op has a mask operand.

6527staticboolhasMaskOp(unsigned Opcode) {

6528assert(Opcode >RISCVISD::FIRST_NUMBER &&

6529 Opcode <=RISCVISD::LAST_STRICTFP_OPCODE &&

6530"not a RISC-V target specific op");

6531static_assert(

6532RISCVISD::LAST_VL_VECTOR_OP -RISCVISD::FIRST_VL_VECTOR_OP == 127 &&

6533RISCVISD::LAST_STRICTFP_OPCODE -RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&

6534"adding target specific op should update this function");

6535if (Opcode >=RISCVISD::TRUNCATE_VECTOR_VL && Opcode <=RISCVISD::SETCC_VL)

6536returntrue;

6537if (Opcode >=RISCVISD::VRGATHER_VX_VL && Opcode <=RISCVISD::VFIRST_VL)

6538returntrue;

6539if (Opcode >=RISCVISD::STRICT_FADD_VL &&

6540 Opcode <=RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)

6541returntrue;

6542returnfalse;

6543}

6544

6545staticboolisPromotedOpNeedingSplit(SDValue Op,

6546constRISCVSubtarget &Subtarget) {

6547if (Op.getValueType() == MVT::nxv32f16 &&

6548 (Subtarget.hasVInstructionsF16Minimal() &&

6549 !Subtarget.hasVInstructionsF16()))

6550returntrue;

6551if (Op.getValueType() == MVT::nxv32bf16)

6552returntrue;

6553returnfalse;

6554}

6555

6556staticSDValue SplitVectorOp(SDValue Op,SelectionDAG &DAG) {

6557auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());

6558SDLoc DL(Op);

6559

6560SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());

6561SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());

6562

6563for (unsigned j = 0; j !=Op.getNumOperands(); ++j) {

6564if (!Op.getOperand(j).getValueType().isVector()) {

6565 LoOperands[j] =Op.getOperand(j);

6566 HiOperands[j] =Op.getOperand(j);

6567continue;

6568 }

6569 std::tie(LoOperands[j], HiOperands[j]) =

6570 DAG.SplitVector(Op.getOperand(j),DL);

6571 }

6572

6573SDValue LoRes =

6574 DAG.getNode(Op.getOpcode(),DL, LoVT, LoOperands,Op->getFlags());

6575SDValue HiRes =

6576 DAG.getNode(Op.getOpcode(),DL, HiVT, HiOperands,Op->getFlags());

6577

6578return DAG.getNode(ISD::CONCAT_VECTORS,DL,Op.getValueType(), LoRes, HiRes);

6579}

6580

6581staticSDValue SplitVPOp(SDValue Op,SelectionDAG &DAG) {

6582assert(ISD::isVPOpcode(Op.getOpcode()) &&"Not a VP op");

6583auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());

6584SDLoc DL(Op);

6585

6586SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());

6587SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());

6588

6589for (unsigned j = 0; j !=Op.getNumOperands(); ++j) {

6590if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {

6591 std::tie(LoOperands[j], HiOperands[j]) =

6592 DAG.SplitEVL(Op.getOperand(j),Op.getValueType(),DL);

6593continue;

6594 }

6595if (!Op.getOperand(j).getValueType().isVector()) {

6596 LoOperands[j] =Op.getOperand(j);

6597 HiOperands[j] =Op.getOperand(j);

6598continue;

6599 }

6600 std::tie(LoOperands[j], HiOperands[j]) =

6601 DAG.SplitVector(Op.getOperand(j),DL);

6602 }

6603

6604SDValue LoRes =

6605 DAG.getNode(Op.getOpcode(),DL, LoVT, LoOperands,Op->getFlags());

6606SDValue HiRes =

6607 DAG.getNode(Op.getOpcode(),DL, HiVT, HiOperands,Op->getFlags());

6608

6609return DAG.getNode(ISD::CONCAT_VECTORS,DL,Op.getValueType(), LoRes, HiRes);

6610}

6611

6612staticSDValue SplitVectorReductionOp(SDValue Op,SelectionDAG &DAG) {

6613SDLoc DL(Op);

6614

6615auto [Lo,Hi] = DAG.SplitVector(Op.getOperand(1),DL);

6616auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2),DL);

6617auto [EVLLo, EVLHi] =

6618 DAG.SplitEVL(Op.getOperand(3),Op.getOperand(1).getValueType(),DL);

6619

6620SDValue ResLo =

6621 DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),

6622 {Op.getOperand(0), Lo, MaskLo, EVLLo},Op->getFlags());

6623return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),

6624 {ResLo, Hi, MaskHi, EVLHi},Op->getFlags());

6625}

6626

6627staticSDValue SplitStrictFPVectorOp(SDValue Op,SelectionDAG &DAG) {

6628

6629assert(Op->isStrictFPOpcode());

6630

6631auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));

6632

6633SDVTList LoVTs = DAG.getVTList(LoVT,Op->getValueType(1));

6634SDVTList HiVTs = DAG.getVTList(HiVT,Op->getValueType(1));

6635

6636SDLoc DL(Op);

6637

6638SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());

6639SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());

6640

6641for (unsigned j = 0; j !=Op.getNumOperands(); ++j) {

6642if (!Op.getOperand(j).getValueType().isVector()) {

6643 LoOperands[j] =Op.getOperand(j);

6644 HiOperands[j] =Op.getOperand(j);

6645continue;

6646 }

6647 std::tie(LoOperands[j], HiOperands[j]) =

6648 DAG.SplitVector(Op.getOperand(j),DL);

6649 }

6650

6651SDValue LoRes =

6652 DAG.getNode(Op.getOpcode(),DL, LoVTs, LoOperands,Op->getFlags());

6653 HiOperands[0] = LoRes.getValue(1);

6654SDValue HiRes =

6655 DAG.getNode(Op.getOpcode(),DL, HiVTs, HiOperands,Op->getFlags());

6656

6657SDValue V = DAG.getNode(ISD::CONCAT_VECTORS,DL,Op->getValueType(0),

6658 LoRes.getValue(0), HiRes.getValue(0));

6659return DAG.getMergeValues({V, HiRes.getValue(1)},DL);

6660}

6661

6662SDValue RISCVTargetLowering::LowerOperation(SDValue Op,

6663SelectionDAG &DAG) const{

6664switch (Op.getOpcode()) {

6665default:

6666report_fatal_error("unimplemented operand");

6667caseISD::ATOMIC_FENCE:

6668returnLowerATOMIC_FENCE(Op, DAG, Subtarget);

6669caseISD::GlobalAddress:

6670return lowerGlobalAddress(Op, DAG);

6671caseISD::BlockAddress:

6672return lowerBlockAddress(Op, DAG);

6673caseISD::ConstantPool:

6674return lowerConstantPool(Op, DAG);

6675caseISD::JumpTable:

6676return lowerJumpTable(Op, DAG);

6677caseISD::GlobalTLSAddress:

6678return lowerGlobalTLSAddress(Op, DAG);

6679caseISD::Constant:

6680returnlowerConstant(Op, DAG, Subtarget);

6681caseISD::ConstantFP:

6682return lowerConstantFP(Op, DAG);

6683caseISD::SELECT:

6684return lowerSELECT(Op, DAG);

6685caseISD::BRCOND:

6686return lowerBRCOND(Op, DAG);

6687caseISD::VASTART:

6688return lowerVASTART(Op, DAG);

6689caseISD::FRAMEADDR:

6690return lowerFRAMEADDR(Op, DAG);

6691caseISD::RETURNADDR:

6692return lowerRETURNADDR(Op, DAG);

6693caseISD::SHL_PARTS:

6694return lowerShiftLeftParts(Op, DAG);

6695caseISD::SRA_PARTS:

6696return lowerShiftRightParts(Op, DAG,true);

6697caseISD::SRL_PARTS:

6698return lowerShiftRightParts(Op, DAG,false);

6699caseISD::ROTL:

6700caseISD::ROTR:

6701if (Op.getValueType().isFixedLengthVector()) {

6702assert(Subtarget.hasStdExtZvkb());

6703return lowerToScalableOp(Op, DAG);

6704 }

6705assert(Subtarget.hasVendorXTHeadBb() &&

6706 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&

6707"Unexpected custom legalization");

6708// XTHeadBb only supports rotate by constant.

6709if (!isa<ConstantSDNode>(Op.getOperand(1)))

6710returnSDValue();

6711returnOp;

6712caseISD::BITCAST: {

6713SDLoc DL(Op);

6714EVT VT =Op.getValueType();

6715SDValue Op0 =Op.getOperand(0);

6716EVT Op0VT = Op0.getValueType();

6717MVT XLenVT = Subtarget.getXLenVT();

6718if (Op0VT == MVT::i16 &&

6719 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||

6720 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {

6721SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT, Op0);

6722return DAG.getNode(RISCVISD::FMV_H_X,DL, VT, NewOp0);

6723 }

6724if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&

6725 Subtarget.hasStdExtFOrZfinx()) {

6726SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64, Op0);

6727return DAG.getNode(RISCVISD::FMV_W_X_RV64,DL, MVT::f32, NewOp0);

6728 }

6729if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&

6730 Subtarget.hasStdExtDOrZdinx()) {

6731SDValue Lo,Hi;

6732 std::tie(Lo,Hi) = DAG.SplitScalar(Op0,DL, MVT::i32, MVT::i32);

6733return DAG.getNode(RISCVISD::BuildPairF64,DL, MVT::f64,Lo,Hi);

6734 }

6735

6736// Consider other scalar<->scalar casts as legal if the types are legal.

6737// Otherwise expand them.

6738if (!VT.isVector() && !Op0VT.isVector()) {

6739if (isTypeLegal(VT) &&isTypeLegal(Op0VT))

6740returnOp;

6741returnSDValue();

6742 }

6743

6744assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&

6745"Unexpected types");

6746

6747if (VT.isFixedLengthVector()) {

6748// We can handle fixed length vector bitcasts with a simple replacement

6749// in isel.

6750if (Op0VT.isFixedLengthVector())

6751returnOp;

6752// When bitcasting from scalar to fixed-length vector, insert the scalar

6753// into a one-element vector of the result type, and perform a vector

6754// bitcast.

6755if (!Op0VT.isVector()) {

6756EVT BVT =EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);

6757if (!isTypeLegal(BVT))

6758returnSDValue();

6759return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, BVT,

6760 DAG.getUNDEF(BVT), Op0,

6761 DAG.getVectorIdxConstant(0,DL)));

6762 }

6763returnSDValue();

6764 }

6765// Custom-legalize bitcasts from fixed-length vector types to scalar types

6766// thus: bitcast the vector to a one-element vector type whose element type

6767// is the same as the result type, and extract the first element.

6768if (!VT.isVector() && Op0VT.isFixedLengthVector()) {

6769EVT BVT =EVT::getVectorVT(*DAG.getContext(), VT, 1);

6770if (!isTypeLegal(BVT))

6771returnSDValue();

6772SDValue BVec = DAG.getBitcast(BVT, Op0);

6773return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, VT, BVec,

6774 DAG.getVectorIdxConstant(0,DL));

6775 }

6776returnSDValue();

6777 }

6778caseISD::INTRINSIC_WO_CHAIN:

6779return LowerINTRINSIC_WO_CHAIN(Op, DAG);

6780caseISD::INTRINSIC_W_CHAIN:

6781return LowerINTRINSIC_W_CHAIN(Op, DAG);

6782caseISD::INTRINSIC_VOID:

6783return LowerINTRINSIC_VOID(Op, DAG);

6784caseISD::IS_FPCLASS:

6785return LowerIS_FPCLASS(Op, DAG);

6786caseISD::BITREVERSE: {

6787MVT VT =Op.getSimpleValueType();

6788if (VT.isFixedLengthVector()) {

6789assert(Subtarget.hasStdExtZvbb());

6790return lowerToScalableOp(Op, DAG);

6791 }

6792SDLoc DL(Op);

6793assert(Subtarget.hasStdExtZbkb() &&"Unexpected custom legalization");

6794assert(Op.getOpcode() ==ISD::BITREVERSE &&"Unexpected opcode");

6795// Expand bitreverse to a bswap(rev8) followed by brev8.

6796SDValue BSwap = DAG.getNode(ISD::BSWAP,DL, VT,Op.getOperand(0));

6797return DAG.getNode(RISCVISD::BREV8,DL, VT, BSwap);

6798 }

6799caseISD::TRUNCATE:

6800caseISD::TRUNCATE_SSAT_S:

6801caseISD::TRUNCATE_USAT_U:

6802// Only custom-lower vector truncates

6803if (!Op.getSimpleValueType().isVector())

6804returnOp;

6805return lowerVectorTruncLike(Op, DAG);

6806caseISD::ANY_EXTEND:

6807caseISD::ZERO_EXTEND:

6808if (Op.getOperand(0).getValueType().isVector() &&

6809Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)

6810return lowerVectorMaskExt(Op, DAG,/*ExtVal*/ 1);

6811return lowerFixedLengthVectorExtendToRVV(Op, DAG,RISCVISD::VZEXT_VL);

6812caseISD::SIGN_EXTEND:

6813if (Op.getOperand(0).getValueType().isVector() &&

6814Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)

6815return lowerVectorMaskExt(Op, DAG,/*ExtVal*/ -1);

6816return lowerFixedLengthVectorExtendToRVV(Op, DAG,RISCVISD::VSEXT_VL);

6817caseISD::SPLAT_VECTOR_PARTS:

6818return lowerSPLAT_VECTOR_PARTS(Op, DAG);

6819caseISD::INSERT_VECTOR_ELT:

6820return lowerINSERT_VECTOR_ELT(Op, DAG);

6821caseISD::EXTRACT_VECTOR_ELT:

6822return lowerEXTRACT_VECTOR_ELT(Op, DAG);

6823caseISD::SCALAR_TO_VECTOR: {

6824MVT VT =Op.getSimpleValueType();

6825SDLoc DL(Op);

6826SDValue Scalar =Op.getOperand(0);

6827if (VT.getVectorElementType() == MVT::i1) {

6828MVT WideVT = VT.changeVectorElementType(MVT::i8);

6829SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR,DL, WideVT, Scalar);

6830return DAG.getNode(ISD::TRUNCATE,DL, VT, V);

6831 }

6832MVT ContainerVT = VT;

6833if (VT.isFixedLengthVector())

6834 ContainerVT =getContainerForFixedLengthVector(VT);

6835SDValue VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;

6836

6837SDValue V;

6838if (VT.isFloatingPoint()) {

6839 V = DAG.getNode(RISCVISD::VFMV_S_F_VL,DL, ContainerVT,

6840 DAG.getUNDEF(ContainerVT), Scalar, VL);

6841 }else {

6842 Scalar = DAG.getNode(ISD::ANY_EXTEND,DL, Subtarget.getXLenVT(), Scalar);

6843 V = DAG.getNode(RISCVISD::VMV_S_X_VL,DL, ContainerVT,

6844 DAG.getUNDEF(ContainerVT), Scalar, VL);

6845 }

6846if (VT.isFixedLengthVector())

6847 V =convertFromScalableVector(VT, V, DAG, Subtarget);

6848return V;

6849 }

6850caseISD::VSCALE: {

6851MVT XLenVT = Subtarget.getXLenVT();

6852MVT VT =Op.getSimpleValueType();

6853SDLoc DL(Op);

6854SDValue Res = DAG.getNode(RISCVISD::READ_VLENB,DL, XLenVT);

6855// We define our scalable vector types for lmul=1 to use a 64 bit known

6856// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate

6857// vscale as VLENB / 8.

6858static_assert(RISCV::RVVBitsPerBlock == 64,"Unexpected bits per block!");

6859if (Subtarget.getRealMinVLen() <RISCV::RVVBitsPerBlock)

6860report_fatal_error("Support for VLEN==32 is incomplete.");

6861// We assume VLENB is a multiple of 8. We manually choose the best shift

6862// here because SimplifyDemandedBits isn't always able to simplify it.

6863uint64_t Val =Op.getConstantOperandVal(0);

6864if (isPowerOf2_64(Val)) {

6865uint64_t Log2 =Log2_64(Val);

6866if (Log2 < 3)

6867 Res = DAG.getNode(ISD::SRL,DL, XLenVT, Res,

6868 DAG.getConstant(3 -Log2,DL, VT));

6869elseif (Log2 > 3)

6870 Res = DAG.getNode(ISD::SHL,DL, XLenVT, Res,

6871 DAG.getConstant(Log2 - 3,DL, XLenVT));

6872 }elseif ((Val % 8) == 0) {

6873// If the multiplier is a multiple of 8, scale it down to avoid needing

6874// to shift the VLENB value.

6875 Res = DAG.getNode(ISD::MUL,DL, XLenVT, Res,

6876 DAG.getConstant(Val / 8,DL, XLenVT));

6877 }else {

6878SDValue VScale = DAG.getNode(ISD::SRL,DL, XLenVT, Res,

6879 DAG.getConstant(3,DL, XLenVT));

6880 Res = DAG.getNode(ISD::MUL,DL, XLenVT, VScale,

6881 DAG.getConstant(Val,DL, XLenVT));

6882 }

6883return DAG.getNode(ISD::TRUNCATE,DL, VT, Res);

6884 }

6885caseISD::FPOWI: {

6886// Custom promote f16 powi with illegal i32 integer type on RV64. Once

6887// promoted this will be legalized into a libcall by LegalizeIntegerTypes.

6888if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&

6889Op.getOperand(1).getValueType() == MVT::i32) {

6890SDLoc DL(Op);

6891SDValue Op0 = DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32,Op.getOperand(0));

6892SDValue Powi =

6893 DAG.getNode(ISD::FPOWI,DL, MVT::f32, Op0,Op.getOperand(1));

6894return DAG.getNode(ISD::FP_ROUND,DL, MVT::f16, Powi,

6895 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true));

6896 }

6897returnSDValue();

6898 }

6899caseISD::FMAXIMUM:

6900caseISD::FMINIMUM:

6901if (isPromotedOpNeedingSplit(Op, Subtarget))

6902returnSplitVectorOp(Op, DAG);

6903returnlowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);

6904caseISD::FP_EXTEND:

6905caseISD::FP_ROUND:

6906return lowerVectorFPExtendOrRoundLike(Op, DAG);

6907caseISD::STRICT_FP_ROUND:

6908caseISD::STRICT_FP_EXTEND:

6909return lowerStrictFPExtendOrRoundLike(Op, DAG);

6910caseISD::SINT_TO_FP:

6911caseISD::UINT_TO_FP:

6912if (Op.getValueType().isVector() &&

6913 ((Op.getValueType().getScalarType() == MVT::f16 &&

6914 (Subtarget.hasVInstructionsF16Minimal() &&

6915 !Subtarget.hasVInstructionsF16())) ||

6916Op.getValueType().getScalarType() == MVT::bf16)) {

6917if (isPromotedOpNeedingSplit(Op, Subtarget))

6918returnSplitVectorOp(Op, DAG);

6919// int -> f32

6920SDLoc DL(Op);

6921MVT NVT =

6922MVT::getVectorVT(MVT::f32,Op.getValueType().getVectorElementCount());

6923SDValue NC = DAG.getNode(Op.getOpcode(),DL, NVT,Op->ops());

6924// f32 -> [b]f16

6925return DAG.getNode(ISD::FP_ROUND,DL,Op.getValueType(),NC,

6926 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true));

6927 }

6928 [[fallthrough]];

6929caseISD::FP_TO_SINT:

6930caseISD::FP_TO_UINT:

6931if (SDValue Op1 =Op.getOperand(0);

6932 Op1.getValueType().isVector() &&

6933 ((Op1.getValueType().getScalarType() == MVT::f16 &&

6934 (Subtarget.hasVInstructionsF16Minimal() &&

6935 !Subtarget.hasVInstructionsF16())) ||

6936 Op1.getValueType().getScalarType() == MVT::bf16)) {

6937if (isPromotedOpNeedingSplit(Op1, Subtarget))

6938returnSplitVectorOp(Op, DAG);

6939// [b]f16 -> f32

6940SDLoc DL(Op);

6941MVT NVT =MVT::getVectorVT(MVT::f32,

6942 Op1.getValueType().getVectorElementCount());

6943SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND,DL, NVT, Op1);

6944// f32 -> int

6945return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(), WidenVec);

6946 }

6947 [[fallthrough]];

6948caseISD::STRICT_FP_TO_SINT:

6949caseISD::STRICT_FP_TO_UINT:

6950caseISD::STRICT_SINT_TO_FP:

6951caseISD::STRICT_UINT_TO_FP: {

6952// RVV can only do fp<->int conversions to types half/double the size as

6953// the source. We custom-lower any conversions that do two hops into

6954// sequences.

6955MVT VT =Op.getSimpleValueType();

6956if (VT.isScalarInteger())

6957returnlowerFP_TO_INT(Op, DAG, Subtarget);

6958bool IsStrict =Op->isStrictFPOpcode();

6959SDValue Src =Op.getOperand(0 + IsStrict);

6960MVT SrcVT = Src.getSimpleValueType();

6961if (SrcVT.isScalarInteger())

6962returnlowerINT_TO_FP(Op, DAG, Subtarget);

6963if (!VT.isVector())

6964returnOp;

6965SDLoc DL(Op);

6966MVT EltVT = VT.getVectorElementType();

6967MVT SrcEltVT = SrcVT.getVectorElementType();

6968unsigned EltSize = EltVT.getSizeInBits();

6969unsigned SrcEltSize = SrcEltVT.getSizeInBits();

6970assert(isPowerOf2_32(EltSize) &&isPowerOf2_32(SrcEltSize) &&

6971"Unexpected vector element types");

6972

6973bool IsInt2FP = SrcEltVT.isInteger();

6974// Widening conversions

6975if (EltSize > (2 * SrcEltSize)) {

6976if (IsInt2FP) {

6977// Do a regular integer sign/zero extension then convert to float.

6978MVT IVecVT =MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),

6979 VT.getVectorElementCount());

6980unsigned ExtOpcode = (Op.getOpcode() ==ISD::UINT_TO_FP ||

6981Op.getOpcode() ==ISD::STRICT_UINT_TO_FP)

6982 ?ISD::ZERO_EXTEND

6983 :ISD::SIGN_EXTEND;

6984SDValue Ext = DAG.getNode(ExtOpcode,DL, IVecVT, Src);

6985if (IsStrict)

6986return DAG.getNode(Op.getOpcode(),DL,Op->getVTList(),

6987Op.getOperand(0), Ext);

6988return DAG.getNode(Op.getOpcode(),DL, VT, Ext);

6989 }

6990// FP2Int

6991assert(SrcEltVT == MVT::f16 &&"Unexpected FP_TO_[US]INT lowering");

6992// Do one doubling fp_extend then complete the operation by converting

6993// to int.

6994MVT InterimFVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

6995if (IsStrict) {

6996auto [FExt, Chain] =

6997 DAG.getStrictFPExtendOrRound(Src,Op.getOperand(0),DL, InterimFVT);

6998return DAG.getNode(Op.getOpcode(),DL,Op->getVTList(), Chain, FExt);

6999 }

7000SDValue FExt = DAG.getFPExtendOrRound(Src,DL, InterimFVT);

7001return DAG.getNode(Op.getOpcode(),DL, VT, FExt);

7002 }

7003

7004// Narrowing conversions

7005if (SrcEltSize > (2 * EltSize)) {

7006if (IsInt2FP) {

7007// One narrowing int_to_fp, then an fp_round.

7008assert(EltVT == MVT::f16 &&"Unexpected [US]_TO_FP lowering");

7009MVT InterimFVT =MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());

7010if (IsStrict) {

7011SDValue Int2FP = DAG.getNode(Op.getOpcode(),DL,

7012 DAG.getVTList(InterimFVT, MVT::Other),

7013Op.getOperand(0), Src);

7014SDValue Chain = Int2FP.getValue(1);

7015return DAG.getStrictFPExtendOrRound(Int2FP, Chain,DL, VT).first;

7016 }

7017SDValue Int2FP = DAG.getNode(Op.getOpcode(),DL, InterimFVT, Src);

7018return DAG.getFPExtendOrRound(Int2FP,DL, VT);

7019 }

7020// FP2Int

7021// One narrowing fp_to_int, then truncate the integer. If the float isn't

7022// representable by the integer, the result is poison.

7023MVT IVecVT =MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),

7024 VT.getVectorElementCount());

7025if (IsStrict) {

7026SDValue FP2Int =

7027 DAG.getNode(Op.getOpcode(),DL, DAG.getVTList(IVecVT, MVT::Other),

7028Op.getOperand(0), Src);

7029SDValue Res = DAG.getNode(ISD::TRUNCATE,DL, VT, FP2Int);

7030return DAG.getMergeValues({Res, FP2Int.getValue(1)},DL);

7031 }

7032SDValue FP2Int = DAG.getNode(Op.getOpcode(),DL, IVecVT, Src);

7033return DAG.getNode(ISD::TRUNCATE,DL, VT, FP2Int);

7034 }

7035

7036// Scalable vectors can exit here. Patterns will handle equally-sized

7037// conversions halving/doubling ones.

7038if (!VT.isFixedLengthVector())

7039returnOp;

7040

7041// For fixed-length vectors we lower to a custom "VL" node.

7042unsigned RVVOpc = 0;

7043switch (Op.getOpcode()) {

7044default:

7045llvm_unreachable("Impossible opcode");

7046caseISD::FP_TO_SINT:

7047 RVVOpc =RISCVISD::VFCVT_RTZ_X_F_VL;

7048break;

7049caseISD::FP_TO_UINT:

7050 RVVOpc =RISCVISD::VFCVT_RTZ_XU_F_VL;

7051break;

7052caseISD::SINT_TO_FP:

7053 RVVOpc =RISCVISD::SINT_TO_FP_VL;

7054break;

7055caseISD::UINT_TO_FP:

7056 RVVOpc =RISCVISD::UINT_TO_FP_VL;

7057break;

7058caseISD::STRICT_FP_TO_SINT:

7059 RVVOpc =RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;

7060break;

7061caseISD::STRICT_FP_TO_UINT:

7062 RVVOpc =RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;

7063break;

7064caseISD::STRICT_SINT_TO_FP:

7065 RVVOpc =RISCVISD::STRICT_SINT_TO_FP_VL;

7066break;

7067caseISD::STRICT_UINT_TO_FP:

7068 RVVOpc =RISCVISD::STRICT_UINT_TO_FP_VL;

7069break;

7070 }

7071

7072MVT ContainerVT =getContainerForFixedLengthVector(VT);

7073MVT SrcContainerVT =getContainerForFixedLengthVector(SrcVT);

7074assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&

7075"Expected same element count");

7076

7077auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

7078

7079 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

7080if (IsStrict) {

7081 Src = DAG.getNode(RVVOpc,DL, DAG.getVTList(ContainerVT, MVT::Other),

7082Op.getOperand(0), Src, Mask, VL);

7083SDValue SubVec =convertFromScalableVector(VT, Src, DAG, Subtarget);

7084return DAG.getMergeValues({SubVec, Src.getValue(1)},DL);

7085 }

7086 Src = DAG.getNode(RVVOpc,DL, ContainerVT, Src, Mask, VL);

7087returnconvertFromScalableVector(VT, Src, DAG, Subtarget);

7088 }

7089caseISD::FP_TO_SINT_SAT:

7090caseISD::FP_TO_UINT_SAT:

7091returnlowerFP_TO_INT_SAT(Op, DAG, Subtarget);

7092caseISD::FP_TO_BF16: {

7093// Custom lower to ensure the libcall return is passed in an FPR on hard

7094// float ABIs.

7095assert(!Subtarget.isSoftFPABI() &&"Unexpected custom legalization");

7096SDLoc DL(Op);

7097MakeLibCallOptions CallOptions;

7098RTLIB::Libcall LC =

7099RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);

7100SDValue Res =

7101makeLibCall(DAG, LC, MVT::f32,Op.getOperand(0), CallOptions,DL).first;

7102if (Subtarget.is64Bit())

7103return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, MVT::i64, Res);

7104return DAG.getBitcast(MVT::i32, Res);

7105 }

7106caseISD::BF16_TO_FP: {

7107assert(Subtarget.hasStdExtFOrZfinx() &&"Unexpected custom legalization");

7108MVT VT =Op.getSimpleValueType();

7109SDLoc DL(Op);

7110Op = DAG.getNode(

7111ISD::SHL,DL,Op.getOperand(0).getValueType(),Op.getOperand(0),

7112 DAG.getShiftAmountConstant(16,Op.getOperand(0).getValueType(),DL));

7113SDValue Res = Subtarget.is64Bit()

7114 ? DAG.getNode(RISCVISD::FMV_W_X_RV64,DL, MVT::f32,Op)

7115 : DAG.getBitcast(MVT::f32,Op);

7116// fp_extend if the target VT is bigger than f32.

7117if (VT != MVT::f32)

7118return DAG.getNode(ISD::FP_EXTEND,DL, VT, Res);

7119return Res;

7120 }

7121caseISD::STRICT_FP_TO_FP16:

7122caseISD::FP_TO_FP16: {

7123// Custom lower to ensure the libcall return is passed in an FPR on hard

7124// float ABIs.

7125assert(Subtarget.hasStdExtFOrZfinx() &&"Unexpected custom legalisation");

7126SDLoc DL(Op);

7127MakeLibCallOptions CallOptions;

7128bool IsStrict =Op->isStrictFPOpcode();

7129SDValue Op0 = IsStrict ?Op.getOperand(1) :Op.getOperand(0);

7130SDValue Chain = IsStrict ?Op.getOperand(0) :SDValue();

7131RTLIB::Libcall LC =RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);

7132SDValue Res;

7133 std::tie(Res, Chain) =

7134makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions,DL, Chain);

7135if (Subtarget.is64Bit())

7136return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, MVT::i64, Res);

7137SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);

7138if (IsStrict)

7139return DAG.getMergeValues({Result, Chain},DL);

7140return Result;

7141 }

7142caseISD::STRICT_FP16_TO_FP:

7143caseISD::FP16_TO_FP: {

7144// Custom lower to ensure the libcall argument is passed in an FPR on hard

7145// float ABIs.

7146assert(Subtarget.hasStdExtFOrZfinx() &&"Unexpected custom legalisation");

7147SDLoc DL(Op);

7148MakeLibCallOptions CallOptions;

7149bool IsStrict =Op->isStrictFPOpcode();

7150SDValue Op0 = IsStrict ?Op.getOperand(1) :Op.getOperand(0);

7151SDValue Chain = IsStrict ?Op.getOperand(0) :SDValue();

7152SDValue Arg = Subtarget.is64Bit()

7153 ? DAG.getNode(RISCVISD::FMV_W_X_RV64,DL, MVT::f32, Op0)

7154 : DAG.getBitcast(MVT::f32, Op0);

7155SDValue Res;

7156 std::tie(Res, Chain) =makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,

7157 CallOptions,DL, Chain);

7158if (IsStrict)

7159return DAG.getMergeValues({Res, Chain},DL);

7160return Res;

7161 }

7162caseISD::FTRUNC:

7163caseISD::FCEIL:

7164caseISD::FFLOOR:

7165caseISD::FNEARBYINT:

7166caseISD::FRINT:

7167caseISD::FROUND:

7168caseISD::FROUNDEVEN:

7169if (isPromotedOpNeedingSplit(Op, Subtarget))

7170returnSplitVectorOp(Op, DAG);

7171returnlowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);

7172caseISD::LRINT:

7173caseISD::LLRINT:

7174if (Op.getValueType().isVector())

7175returnlowerVectorXRINT(Op, DAG, Subtarget);

7176 [[fallthrough]];

7177caseISD::LROUND:

7178caseISD::LLROUND: {

7179assert(Op.getOperand(0).getValueType() == MVT::f16 &&

7180"Unexpected custom legalisation");

7181SDLoc DL(Op);

7182SDValue Ext = DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32,Op.getOperand(0));

7183return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(), Ext);

7184 }

7185caseISD::STRICT_LRINT:

7186caseISD::STRICT_LLRINT:

7187caseISD::STRICT_LROUND:

7188caseISD::STRICT_LLROUND: {

7189assert(Op.getOperand(1).getValueType() == MVT::f16 &&

7190"Unexpected custom legalisation");

7191SDLoc DL(Op);

7192SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND,DL, {MVT::f32, MVT::Other},

7193 {Op.getOperand(0),Op.getOperand(1)});

7194return DAG.getNode(Op.getOpcode(),DL, {Op.getValueType(), MVT::Other},

7195 {Ext.getValue(1), Ext.getValue(0)});

7196 }

7197caseISD::VECREDUCE_ADD:

7198caseISD::VECREDUCE_UMAX:

7199caseISD::VECREDUCE_SMAX:

7200caseISD::VECREDUCE_UMIN:

7201caseISD::VECREDUCE_SMIN:

7202return lowerVECREDUCE(Op, DAG);

7203caseISD::VECREDUCE_AND:

7204caseISD::VECREDUCE_OR:

7205caseISD::VECREDUCE_XOR:

7206if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)

7207return lowerVectorMaskVecReduction(Op, DAG,/*IsVP*/false);

7208return lowerVECREDUCE(Op, DAG);

7209caseISD::VECREDUCE_FADD:

7210caseISD::VECREDUCE_SEQ_FADD:

7211caseISD::VECREDUCE_FMIN:

7212caseISD::VECREDUCE_FMAX:

7213caseISD::VECREDUCE_FMAXIMUM:

7214caseISD::VECREDUCE_FMINIMUM:

7215return lowerFPVECREDUCE(Op, DAG);

7216case ISD::VP_REDUCE_ADD:

7217case ISD::VP_REDUCE_UMAX:

7218case ISD::VP_REDUCE_SMAX:

7219case ISD::VP_REDUCE_UMIN:

7220case ISD::VP_REDUCE_SMIN:

7221case ISD::VP_REDUCE_FADD:

7222case ISD::VP_REDUCE_SEQ_FADD:

7223case ISD::VP_REDUCE_FMIN:

7224case ISD::VP_REDUCE_FMAX:

7225case ISD::VP_REDUCE_FMINIMUM:

7226case ISD::VP_REDUCE_FMAXIMUM:

7227if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))

7228returnSplitVectorReductionOp(Op, DAG);

7229return lowerVPREDUCE(Op, DAG);

7230case ISD::VP_REDUCE_AND:

7231case ISD::VP_REDUCE_OR:

7232case ISD::VP_REDUCE_XOR:

7233if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)

7234return lowerVectorMaskVecReduction(Op, DAG,/*IsVP*/true);

7235return lowerVPREDUCE(Op, DAG);

7236case ISD::VP_CTTZ_ELTS:

7237case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:

7238return lowerVPCttzElements(Op, DAG);

7239caseISD::UNDEF: {

7240MVT ContainerVT =getContainerForFixedLengthVector(Op.getSimpleValueType());

7241returnconvertFromScalableVector(Op.getSimpleValueType(),

7242 DAG.getUNDEF(ContainerVT), DAG, Subtarget);

7243 }

7244caseISD::INSERT_SUBVECTOR:

7245return lowerINSERT_SUBVECTOR(Op, DAG);

7246caseISD::EXTRACT_SUBVECTOR:

7247return lowerEXTRACT_SUBVECTOR(Op, DAG);

7248caseISD::VECTOR_DEINTERLEAVE:

7249return lowerVECTOR_DEINTERLEAVE(Op, DAG);

7250caseISD::VECTOR_INTERLEAVE:

7251return lowerVECTOR_INTERLEAVE(Op, DAG);

7252caseISD::STEP_VECTOR:

7253return lowerSTEP_VECTOR(Op, DAG);

7254caseISD::VECTOR_REVERSE:

7255return lowerVECTOR_REVERSE(Op, DAG);

7256caseISD::VECTOR_SPLICE:

7257return lowerVECTOR_SPLICE(Op, DAG);

7258caseISD::BUILD_VECTOR:

7259returnlowerBUILD_VECTOR(Op, DAG, Subtarget);

7260caseISD::SPLAT_VECTOR: {

7261MVT VT =Op.getSimpleValueType();

7262MVT EltVT = VT.getVectorElementType();

7263if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||

7264 EltVT == MVT::bf16) {

7265SDLoc DL(Op);

7266SDValue Elt;

7267if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||

7268 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))

7269 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, Subtarget.getXLenVT(),

7270Op.getOperand(0));

7271else

7272 Elt = DAG.getNode(ISD::BITCAST,DL, MVT::i16,Op.getOperand(0));

7273MVT IVT = VT.changeVectorElementType(MVT::i16);

7274return DAG.getNode(ISD::BITCAST,DL, VT,

7275 DAG.getNode(ISD::SPLAT_VECTOR,DL, IVT, Elt));

7276 }

7277

7278if (EltVT == MVT::i1)

7279return lowerVectorMaskSplat(Op, DAG);

7280returnSDValue();

7281 }

7282caseISD::VECTOR_SHUFFLE:

7283returnlowerVECTOR_SHUFFLE(Op, DAG, Subtarget);

7284caseISD::CONCAT_VECTORS: {

7285// Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is

7286// better than going through the stack, as the default expansion does.

7287SDLoc DL(Op);

7288MVT VT =Op.getSimpleValueType();

7289MVT ContainerVT = VT;

7290if (VT.isFixedLengthVector())

7291 ContainerVT =::getContainerForFixedLengthVector(DAG, VT, Subtarget);

7292

7293// Recursively split concat_vectors with more than 2 operands:

7294//

7295// concat_vector op1, op2, op3, op4

7296// ->

7297// concat_vector (concat_vector op1, op2), (concat_vector op3, op4)

7298//

7299// This reduces the length of the chain of vslideups and allows us to

7300// perform the vslideups at a smaller LMUL, limited to MF2.

7301if (Op.getNumOperands() > 2 &&

7302 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {

7303MVT HalfVT = VT.getHalfNumVectorElementsVT();

7304assert(isPowerOf2_32(Op.getNumOperands()));

7305size_t HalfNumOps =Op.getNumOperands() / 2;

7306SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS,DL, HalfVT,

7307Op->ops().take_front(HalfNumOps));

7308SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS,DL, HalfVT,

7309Op->ops().drop_front(HalfNumOps));

7310return DAG.getNode(ISD::CONCAT_VECTORS,DL, VT,Lo,Hi);

7311 }

7312

7313unsigned NumOpElts =

7314Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();

7315SDValue Vec = DAG.getUNDEF(VT);

7316for (constauto &OpIdx :enumerate(Op->ops())) {

7317SDValue SubVec = OpIdx.value();

7318// Don't insert undef subvectors.

7319if (SubVec.isUndef())

7320continue;

7321 Vec =

7322 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, Vec, SubVec,

7323 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts,DL));

7324 }

7325return Vec;

7326 }

7327caseISD::LOAD: {

7328auto *Load = cast<LoadSDNode>(Op);

7329EVT VecTy = Load->getMemoryVT();

7330// Handle normal vector tuple load.

7331if (VecTy.isRISCVVectorTuple()) {

7332SDLoc DL(Op);

7333MVT XLenVT = Subtarget.getXLenVT();

7334unsigned NF = VecTy.getRISCVVectorTupleNumFields();

7335unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();

7336unsigned NumElts = Sz / (NF * 8);

7337int Log2LMUL =Log2_64(NumElts) - 3;

7338

7339auto Flag =SDNodeFlags();

7340 Flag.setNoUnsignedWrap(true);

7341SDValue Ret = DAG.getUNDEF(VecTy);

7342SDValue BasePtr = Load->getBasePtr();

7343SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB,DL, XLenVT);

7344 VROffset =

7345 DAG.getNode(ISD::SHL,DL, XLenVT, VROffset,

7346 DAG.getConstant(std::max(Log2LMUL, 0),DL, XLenVT));

7347SmallVector<SDValue, 8> OutChains;

7348

7349// Load NF vector registers and combine them to a vector tuple.

7350for (unsigned i = 0; i < NF; ++i) {

7351SDValue LoadVal = DAG.getLoad(

7352MVT::getScalableVectorVT(MVT::i8, NumElts),DL, Load->getChain(),

7353 BasePtr,MachinePointerInfo(Load->getAddressSpace()),Align(8));

7354 OutChains.push_back(LoadVal.getValue(1));

7355 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT,DL, VecTy, Ret, LoadVal,

7356 DAG.getVectorIdxConstant(i,DL));

7357 BasePtr = DAG.getNode(ISD::ADD,DL, XLenVT, BasePtr, VROffset, Flag);

7358 }

7359return DAG.getMergeValues(

7360 {Ret, DAG.getNode(ISD::TokenFactor,DL, MVT::Other, OutChains)},DL);

7361 }

7362

7363if (auto V = expandUnalignedRVVLoad(Op, DAG))

7364return V;

7365if (Op.getValueType().isFixedLengthVector())

7366return lowerFixedLengthVectorLoadToRVV(Op, DAG);

7367returnOp;

7368 }

7369caseISD::STORE: {

7370auto *Store = cast<StoreSDNode>(Op);

7371SDValue StoredVal = Store->getValue();

7372EVT VecTy = StoredVal.getValueType();

7373// Handle normal vector tuple store.

7374if (VecTy.isRISCVVectorTuple()) {

7375SDLoc DL(Op);

7376MVT XLenVT = Subtarget.getXLenVT();

7377unsigned NF = VecTy.getRISCVVectorTupleNumFields();

7378unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();

7379unsigned NumElts = Sz / (NF * 8);

7380int Log2LMUL =Log2_64(NumElts) - 3;

7381

7382auto Flag =SDNodeFlags();

7383 Flag.setNoUnsignedWrap(true);

7384SDValue Ret;

7385SDValue Chain = Store->getChain();

7386SDValue BasePtr = Store->getBasePtr();

7387SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB,DL, XLenVT);

7388 VROffset =

7389 DAG.getNode(ISD::SHL,DL, XLenVT, VROffset,

7390 DAG.getConstant(std::max(Log2LMUL, 0),DL, XLenVT));

7391

7392// Extract subregisters in a vector tuple and store them individually.

7393for (unsigned i = 0; i < NF; ++i) {

7394auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT,DL,

7395MVT::getScalableVectorVT(MVT::i8, NumElts),

7396 StoredVal, DAG.getVectorIdxConstant(i,DL));

7397 Ret = DAG.getStore(Chain,DL, Extract, BasePtr,

7398MachinePointerInfo(Store->getAddressSpace()),

7399 Store->getOriginalAlign(),

7400 Store->getMemOperand()->getFlags());

7401 Chain = Ret.getValue(0);

7402 BasePtr = DAG.getNode(ISD::ADD,DL, XLenVT, BasePtr, VROffset, Flag);

7403 }

7404return Ret;

7405 }

7406

7407if (auto V = expandUnalignedRVVStore(Op, DAG))

7408return V;

7409if (Op.getOperand(1).getValueType().isFixedLengthVector())

7410return lowerFixedLengthVectorStoreToRVV(Op, DAG);

7411returnOp;

7412 }

7413caseISD::MLOAD:

7414case ISD::VP_LOAD:

7415return lowerMaskedLoad(Op, DAG);

7416caseISD::MSTORE:

7417case ISD::VP_STORE:

7418return lowerMaskedStore(Op, DAG);

7419caseISD::VECTOR_COMPRESS:

7420return lowerVectorCompress(Op, DAG);

7421caseISD::SELECT_CC: {

7422// This occurs because we custom legalize SETGT and SETUGT for setcc. That

7423// causes LegalizeDAG to think we need to custom legalize select_cc. Expand

7424// into separate SETCC+SELECT just like LegalizeDAG.

7425SDValue Tmp1 =Op.getOperand(0);

7426SDValue Tmp2 =Op.getOperand(1);

7427SDValue True =Op.getOperand(2);

7428SDValue False =Op.getOperand(3);

7429EVT VT =Op.getValueType();

7430SDValue CC =Op.getOperand(4);

7431EVT CmpVT = Tmp1.getValueType();

7432EVT CCVT =

7433getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);

7434SDLoc DL(Op);

7435SDValue Cond =

7436 DAG.getNode(ISD::SETCC,DL, CCVT, Tmp1, Tmp2,CC,Op->getFlags());

7437return DAG.getSelect(DL, VT,Cond, True, False);

7438 }

7439caseISD::SETCC: {

7440MVT OpVT =Op.getOperand(0).getSimpleValueType();

7441if (OpVT.isScalarInteger()) {

7442MVT VT =Op.getSimpleValueType();

7443SDValue LHS =Op.getOperand(0);

7444SDValue RHS =Op.getOperand(1);

7445ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();

7446assert((CCVal ==ISD::SETGT || CCVal ==ISD::SETUGT) &&

7447"Unexpected CondCode");

7448

7449SDLoc DL(Op);

7450

7451// If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can

7452// convert this to the equivalent of (set(u)ge X, C+1) by using

7453// (xori (slti(u) X, C+1), 1). This avoids materializing a small constant

7454// in a register.

7455if (isa<ConstantSDNode>(RHS)) {

7456 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();

7457if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {

7458// If this is an unsigned compare and the constant is -1, incrementing

7459// the constant would change behavior. The result should be false.

7460if (CCVal ==ISD::SETUGT && Imm == -1)

7461return DAG.getConstant(0,DL, VT);

7462// Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.

7463 CCVal =ISD::getSetCCSwappedOperands(CCVal);

7464SDValue SetCC = DAG.getSetCC(

7465DL, VT,LHS, DAG.getSignedConstant(Imm + 1,DL, OpVT), CCVal);

7466return DAG.getLogicalNOT(DL, SetCC, VT);

7467 }

7468 }

7469

7470// Not a constant we could handle, swap the operands and condition code to

7471// SETLT/SETULT.

7472 CCVal =ISD::getSetCCSwappedOperands(CCVal);

7473return DAG.getSetCC(DL, VT,RHS,LHS, CCVal);

7474 }

7475

7476if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))

7477returnSplitVectorOp(Op, DAG);

7478

7479return lowerFixedLengthVectorSetccToRVV(Op, DAG);

7480 }

7481caseISD::ADD:

7482caseISD::SUB:

7483caseISD::MUL:

7484caseISD::MULHS:

7485caseISD::MULHU:

7486caseISD::AND:

7487caseISD::OR:

7488caseISD::XOR:

7489caseISD::SDIV:

7490caseISD::SREM:

7491caseISD::UDIV:

7492caseISD::UREM:

7493caseISD::BSWAP:

7494caseISD::CTPOP:

7495return lowerToScalableOp(Op, DAG);

7496caseISD::SHL:

7497caseISD::SRA:

7498caseISD::SRL:

7499if (Op.getSimpleValueType().isFixedLengthVector())

7500return lowerToScalableOp(Op, DAG);

7501// This can be called for an i32 shift amount that needs to be promoted.

7502assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&

7503"Unexpected custom legalisation");

7504returnSDValue();

7505caseISD::FABS:

7506caseISD::FNEG:

7507if (Op.getValueType() == MVT::f16 ||Op.getValueType() == MVT::bf16)

7508returnlowerFABSorFNEG(Op, DAG, Subtarget);

7509 [[fallthrough]];

7510caseISD::FADD:

7511caseISD::FSUB:

7512caseISD::FMUL:

7513caseISD::FDIV:

7514caseISD::FSQRT:

7515caseISD::FMA:

7516caseISD::FMINNUM:

7517caseISD::FMAXNUM:

7518if (isPromotedOpNeedingSplit(Op, Subtarget))

7519returnSplitVectorOp(Op, DAG);

7520 [[fallthrough]];

7521caseISD::AVGFLOORS:

7522caseISD::AVGFLOORU:

7523caseISD::AVGCEILS:

7524caseISD::AVGCEILU:

7525caseISD::SMIN:

7526caseISD::SMAX:

7527caseISD::UMIN:

7528caseISD::UMAX:

7529caseISD::UADDSAT:

7530caseISD::USUBSAT:

7531caseISD::SADDSAT:

7532caseISD::SSUBSAT:

7533return lowerToScalableOp(Op, DAG);

7534caseISD::ABDS:

7535caseISD::ABDU: {

7536SDLoc dl(Op);

7537EVT VT =Op->getValueType(0);

7538SDValue LHS = DAG.getFreeze(Op->getOperand(0));

7539SDValue RHS = DAG.getFreeze(Op->getOperand(1));

7540bool IsSigned =Op->getOpcode() ==ISD::ABDS;

7541

7542// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))

7543// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))

7544unsigned MaxOpc = IsSigned ?ISD::SMAX :ISD::UMAX;

7545unsigned MinOpc = IsSigned ?ISD::SMIN :ISD::UMIN;

7546SDValue Max = DAG.getNode(MaxOpc, dl, VT,LHS,RHS);

7547SDValue Min = DAG.getNode(MinOpc, dl, VT,LHS,RHS);

7548return DAG.getNode(ISD::SUB, dl, VT, Max, Min);

7549 }

7550caseISD::ABS:

7551case ISD::VP_ABS:

7552return lowerABS(Op, DAG);

7553caseISD::CTLZ:

7554caseISD::CTLZ_ZERO_UNDEF:

7555caseISD::CTTZ:

7556caseISD::CTTZ_ZERO_UNDEF:

7557if (Subtarget.hasStdExtZvbb())

7558return lowerToScalableOp(Op, DAG);

7559assert(Op.getOpcode() !=ISD::CTTZ);

7560return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);

7561caseISD::VSELECT:

7562return lowerFixedLengthVectorSelectToRVV(Op, DAG);

7563caseISD::FCOPYSIGN:

7564if (Op.getValueType() == MVT::f16 ||Op.getValueType() == MVT::bf16)

7565returnlowerFCOPYSIGN(Op, DAG, Subtarget);

7566if (isPromotedOpNeedingSplit(Op, Subtarget))

7567returnSplitVectorOp(Op, DAG);

7568return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);

7569caseISD::STRICT_FADD:

7570caseISD::STRICT_FSUB:

7571caseISD::STRICT_FMUL:

7572caseISD::STRICT_FDIV:

7573caseISD::STRICT_FSQRT:

7574caseISD::STRICT_FMA:

7575if (isPromotedOpNeedingSplit(Op, Subtarget))

7576returnSplitStrictFPVectorOp(Op, DAG);

7577return lowerToScalableOp(Op, DAG);

7578caseISD::STRICT_FSETCC:

7579caseISD::STRICT_FSETCCS:

7580return lowerVectorStrictFSetcc(Op, DAG);

7581caseISD::STRICT_FCEIL:

7582caseISD::STRICT_FRINT:

7583caseISD::STRICT_FFLOOR:

7584caseISD::STRICT_FTRUNC:

7585caseISD::STRICT_FNEARBYINT:

7586caseISD::STRICT_FROUND:

7587caseISD::STRICT_FROUNDEVEN:

7588returnlowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);

7589caseISD::MGATHER:

7590case ISD::VP_GATHER:

7591return lowerMaskedGather(Op, DAG);

7592caseISD::MSCATTER:

7593case ISD::VP_SCATTER:

7594return lowerMaskedScatter(Op, DAG);

7595caseISD::GET_ROUNDING:

7596return lowerGET_ROUNDING(Op, DAG);

7597caseISD::SET_ROUNDING:

7598return lowerSET_ROUNDING(Op, DAG);

7599caseISD::EH_DWARF_CFA:

7600return lowerEH_DWARF_CFA(Op, DAG);

7601case ISD::VP_MERGE:

7602if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)

7603return lowerVPMergeMask(Op, DAG);

7604 [[fallthrough]];

7605case ISD::VP_SELECT:

7606case ISD::VP_ADD:

7607case ISD::VP_SUB:

7608case ISD::VP_MUL:

7609case ISD::VP_SDIV:

7610case ISD::VP_UDIV:

7611case ISD::VP_SREM:

7612case ISD::VP_UREM:

7613case ISD::VP_UADDSAT:

7614case ISD::VP_USUBSAT:

7615case ISD::VP_SADDSAT:

7616case ISD::VP_SSUBSAT:

7617case ISD::VP_LRINT:

7618case ISD::VP_LLRINT:

7619return lowerVPOp(Op, DAG);

7620case ISD::VP_AND:

7621case ISD::VP_OR:

7622case ISD::VP_XOR:

7623return lowerLogicVPOp(Op, DAG);

7624case ISD::VP_FADD:

7625case ISD::VP_FSUB:

7626case ISD::VP_FMUL:

7627case ISD::VP_FDIV:

7628case ISD::VP_FNEG:

7629case ISD::VP_FABS:

7630case ISD::VP_SQRT:

7631case ISD::VP_FMA:

7632case ISD::VP_FMINNUM:

7633case ISD::VP_FMAXNUM:

7634case ISD::VP_FCOPYSIGN:

7635if (isPromotedOpNeedingSplit(Op, Subtarget))

7636returnSplitVPOp(Op, DAG);

7637 [[fallthrough]];

7638case ISD::VP_SRA:

7639case ISD::VP_SRL:

7640case ISD::VP_SHL:

7641return lowerVPOp(Op, DAG);

7642case ISD::VP_IS_FPCLASS:

7643return LowerIS_FPCLASS(Op, DAG);

7644case ISD::VP_SIGN_EXTEND:

7645case ISD::VP_ZERO_EXTEND:

7646if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)

7647return lowerVPExtMaskOp(Op, DAG);

7648return lowerVPOp(Op, DAG);

7649case ISD::VP_TRUNCATE:

7650return lowerVectorTruncLike(Op, DAG);

7651case ISD::VP_FP_EXTEND:

7652case ISD::VP_FP_ROUND:

7653return lowerVectorFPExtendOrRoundLike(Op, DAG);

7654case ISD::VP_SINT_TO_FP:

7655case ISD::VP_UINT_TO_FP:

7656if (Op.getValueType().isVector() &&

7657 ((Op.getValueType().getScalarType() == MVT::f16 &&

7658 (Subtarget.hasVInstructionsF16Minimal() &&

7659 !Subtarget.hasVInstructionsF16())) ||

7660Op.getValueType().getScalarType() == MVT::bf16)) {

7661if (isPromotedOpNeedingSplit(Op, Subtarget))

7662returnSplitVectorOp(Op, DAG);

7663// int -> f32

7664SDLoc DL(Op);

7665MVT NVT =

7666MVT::getVectorVT(MVT::f32,Op.getValueType().getVectorElementCount());

7667autoNC = DAG.getNode(Op.getOpcode(),DL, NVT,Op->ops());

7668// f32 -> [b]f16

7669return DAG.getNode(ISD::FP_ROUND,DL,Op.getValueType(),NC,

7670 DAG.getIntPtrConstant(0,DL,/*isTarget=*/true));

7671 }

7672 [[fallthrough]];

7673case ISD::VP_FP_TO_SINT:

7674case ISD::VP_FP_TO_UINT:

7675if (SDValue Op1 =Op.getOperand(0);

7676 Op1.getValueType().isVector() &&

7677 ((Op1.getValueType().getScalarType() == MVT::f16 &&

7678 (Subtarget.hasVInstructionsF16Minimal() &&

7679 !Subtarget.hasVInstructionsF16())) ||

7680 Op1.getValueType().getScalarType() == MVT::bf16)) {

7681if (isPromotedOpNeedingSplit(Op1, Subtarget))

7682returnSplitVectorOp(Op, DAG);

7683// [b]f16 -> f32

7684SDLoc DL(Op);

7685MVT NVT =MVT::getVectorVT(MVT::f32,

7686 Op1.getValueType().getVectorElementCount());

7687SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND,DL, NVT, Op1);

7688// f32 -> int

7689return DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),

7690 {WidenVec, Op.getOperand(1), Op.getOperand(2)});

7691 }

7692return lowerVPFPIntConvOp(Op, DAG);

7693case ISD::VP_SETCC:

7694if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))

7695returnSplitVPOp(Op, DAG);

7696if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)

7697return lowerVPSetCCMaskOp(Op, DAG);

7698 [[fallthrough]];

7699case ISD::VP_SMIN:

7700case ISD::VP_SMAX:

7701case ISD::VP_UMIN:

7702case ISD::VP_UMAX:

7703case ISD::VP_BITREVERSE:

7704case ISD::VP_BSWAP:

7705return lowerVPOp(Op, DAG);

7706case ISD::VP_CTLZ:

7707case ISD::VP_CTLZ_ZERO_UNDEF:

7708if (Subtarget.hasStdExtZvbb())

7709return lowerVPOp(Op, DAG);

7710return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);

7711case ISD::VP_CTTZ:

7712case ISD::VP_CTTZ_ZERO_UNDEF:

7713if (Subtarget.hasStdExtZvbb())

7714return lowerVPOp(Op, DAG);

7715return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);

7716case ISD::VP_CTPOP:

7717return lowerVPOp(Op, DAG);

7718case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:

7719return lowerVPStridedLoad(Op, DAG);

7720case ISD::EXPERIMENTAL_VP_STRIDED_STORE:

7721return lowerVPStridedStore(Op, DAG);

7722case ISD::VP_FCEIL:

7723case ISD::VP_FFLOOR:

7724case ISD::VP_FRINT:

7725case ISD::VP_FNEARBYINT:

7726case ISD::VP_FROUND:

7727case ISD::VP_FROUNDEVEN:

7728case ISD::VP_FROUNDTOZERO:

7729if (isPromotedOpNeedingSplit(Op, Subtarget))

7730returnSplitVPOp(Op, DAG);

7731returnlowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);

7732case ISD::VP_FMAXIMUM:

7733case ISD::VP_FMINIMUM:

7734if (isPromotedOpNeedingSplit(Op, Subtarget))

7735returnSplitVPOp(Op, DAG);

7736returnlowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);

7737case ISD::EXPERIMENTAL_VP_SPLICE:

7738return lowerVPSpliceExperimental(Op, DAG);

7739case ISD::EXPERIMENTAL_VP_REVERSE:

7740return lowerVPReverseExperimental(Op, DAG);

7741case ISD::EXPERIMENTAL_VP_SPLAT:

7742return lowerVPSplatExperimental(Op, DAG);

7743caseISD::CLEAR_CACHE: {

7744assert(getTargetMachine().getTargetTriple().isOSLinux() &&

7745"llvm.clear_cache only needs custom lower on Linux targets");

7746SDLoc DL(Op);

7747SDValue Flags = DAG.getConstant(0,DL, Subtarget.getXLenVT());

7748return emitFlushICache(DAG,Op.getOperand(0),Op.getOperand(1),

7749Op.getOperand(2), Flags,DL);

7750 }

7751caseISD::DYNAMIC_STACKALLOC:

7752return lowerDYNAMIC_STACKALLOC(Op, DAG);

7753caseISD::INIT_TRAMPOLINE:

7754return lowerINIT_TRAMPOLINE(Op, DAG);

7755caseISD::ADJUST_TRAMPOLINE:

7756return lowerADJUST_TRAMPOLINE(Op, DAG);

7757 }

7758}

7759

7760SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG,SDValue InChain,

7761SDValue Start,SDValue End,

7762SDValue Flags,SDLoc DL) const{

7763 MakeLibCallOptions CallOptions;

7764 std::pair<SDValue, SDValue> CallResult =

7765makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,

7766 {Start,End, Flags}, CallOptions,DL, InChain);

7767

7768// This function returns void so only the out chain matters.

7769return CallResult.second;

7770}

7771

7772SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,

7773SelectionDAG &DAG) const{

7774if (!Subtarget.is64Bit())

7775llvm::report_fatal_error("Trampolines only implemented for RV64");

7776

7777// Create an MCCodeEmitter to encode instructions.

7778TargetLoweringObjectFile *TLO =getTargetMachine().getObjFileLowering();

7779assert(TLO);

7780MCContext &MCCtx = TLO->getContext();

7781

7782 std::unique_ptr<MCCodeEmitter> CodeEmitter(

7783createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));

7784

7785SDValue Root =Op.getOperand(0);

7786SDValue Trmp =Op.getOperand(1);// trampoline

7787SDLoc dl(Op);

7788

7789constValue *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();

7790

7791// We store in the trampoline buffer the following instructions and data.

7792// Offset:

7793// 0: auipc t2, 0

7794// 4: ld t0, 24(t2)

7795// 8: ld t2, 16(t2)

7796// 12: jalr t0

7797// 16: <StaticChainOffset>

7798// 24: <FunctionAddressOffset>

7799// 32:

7800

7801constexprunsigned StaticChainOffset = 16;

7802constexprunsigned FunctionAddressOffset = 24;

7803

7804constMCSubtargetInfo *STI =getTargetMachine().getMCSubtargetInfo();

7805assert(STI);

7806auto GetEncoding = [&](constMCInst &MC) {

7807SmallVector<char, 4> CB;

7808SmallVector<MCFixup>Fixups;

7809 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);

7810uint32_t Encoding =support::endian::read32le(CB.data());

7811return Encoding;

7812 };

7813

7814SDValue OutChains[6];

7815

7816uint32_t Encodings[] = {

7817// auipc t2, 0

7818// Loads the current PC into t2.

7819 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),

7820// ld t0, 24(t2)

7821// Loads the function address into t0. Note that we are using offsets

7822// pc-relative to the first instruction of the trampoline.

7823 GetEncoding(

7824MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(

7825 FunctionAddressOffset)),

7826// ld t2, 16(t2)

7827// Load the value of the static chain.

7828 GetEncoding(

7829MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(

7830 StaticChainOffset)),

7831// jalr t0

7832// Jump to the function.

7833 GetEncoding(MCInstBuilder(RISCV::JALR)

7834 .addReg(RISCV::X0)

7835 .addReg(RISCV::X5)

7836 .addImm(0))};

7837

7838// Store encoded instructions.

7839for (auto [Idx, Encoding] :llvm::enumerate(Encodings)) {

7840SDValue Addr =Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,

7841 DAG.getConstant(Idx * 4, dl, MVT::i64))

7842 : Trmp;

7843 OutChains[Idx] = DAG.getTruncStore(

7844 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64),Addr,

7845MachinePointerInfo(TrmpAddr,Idx * 4), MVT::i32);

7846 }

7847

7848// Now store the variable part of the trampoline.

7849SDValue FunctionAddress =Op.getOperand(2);

7850SDValue StaticChain =Op.getOperand(3);

7851

7852// Store the given static chain and function pointer in the trampoline buffer.

7853structOffsetValuePair {

7854constunsignedOffset;

7855constSDValue Value;

7856SDValue Addr =SDValue();// Used to cache the address.

7857 } OffsetValues[] = {

7858 {StaticChainOffset, StaticChain},

7859 {FunctionAddressOffset, FunctionAddress},

7860 };

7861for (auto [Idx, OffsetValue] :llvm::enumerate(OffsetValues)) {

7862SDValue Addr =

7863 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,

7864 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));

7865 OffsetValue.Addr =Addr;

7866 OutChains[Idx + 4] =

7867 DAG.getStore(Root, dl, OffsetValue.Value,Addr,

7868MachinePointerInfo(TrmpAddr, OffsetValue.Offset));

7869 }

7870

7871SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);

7872

7873// The end of instructions of trampoline is the same as the static chain

7874// address that we computed earlier.

7875SDValue EndOfTrmp = OffsetValues[0].Addr;

7876

7877// Call clear cache on the trampoline instructions.

7878SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,

7879 Trmp, EndOfTrmp);

7880

7881return Chain;

7882}

7883

7884SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,

7885SelectionDAG &DAG) const{

7886if (!Subtarget.is64Bit())

7887llvm::report_fatal_error("Trampolines only implemented for RV64");

7888

7889returnOp.getOperand(0);

7890}

7891

7892staticSDValue getTargetNode(GlobalAddressSDNode *N,constSDLoc &DL,EVT Ty,

7893SelectionDAG &DAG,unsigned Flags) {

7894return DAG.getTargetGlobalAddress(N->getGlobal(),DL, Ty, 0, Flags);

7895}

7896

7897staticSDValue getTargetNode(BlockAddressSDNode *N,constSDLoc &DL,EVT Ty,

7898SelectionDAG &DAG,unsigned Flags) {

7899return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty,N->getOffset(),

7900 Flags);

7901}

7902

7903staticSDValue getTargetNode(ConstantPoolSDNode *N,constSDLoc &DL,EVT Ty,

7904SelectionDAG &DAG,unsigned Flags) {

7905return DAG.getTargetConstantPool(N->getConstVal(), Ty,N->getAlign(),

7906N->getOffset(), Flags);

7907}

7908

7909staticSDValue getTargetNode(JumpTableSDNode *N,constSDLoc &DL,EVT Ty,

7910SelectionDAG &DAG,unsigned Flags) {

7911return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);

7912}

7913

7914staticSDValue getLargeGlobalAddress(GlobalAddressSDNode *N,constSDLoc &DL,

7915EVT Ty,SelectionDAG &DAG) {

7916RISCVConstantPoolValue *CPV =RISCVConstantPoolValue::Create(N->getGlobal());

7917SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty,Align(8));

7918SDValue LC = DAG.getNode(RISCVISD::LLA,DL, Ty, CPAddr);

7919return DAG.getLoad(

7920 Ty,DL, DAG.getEntryNode(), LC,

7921MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

7922}

7923

7924staticSDValue getLargeExternalSymbol(ExternalSymbolSDNode *N,constSDLoc &DL,

7925EVT Ty,SelectionDAG &DAG) {

7926RISCVConstantPoolValue *CPV =

7927RISCVConstantPoolValue::Create(*DAG.getContext(),N->getSymbol());

7928SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty,Align(8));

7929SDValue LC = DAG.getNode(RISCVISD::LLA,DL, Ty, CPAddr);

7930return DAG.getLoad(

7931 Ty,DL, DAG.getEntryNode(), LC,

7932MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

7933}

7934

7935template <class NodeTy>

7936SDValue RISCVTargetLowering::getAddr(NodeTy *N,SelectionDAG &DAG,

7937bool IsLocal,bool IsExternWeak) const{

7938SDLoc DL(N);

7939EVT Ty =getPointerTy(DAG.getDataLayout());

7940

7941// When HWASAN is used and tagging of global variables is enabled

7942// they should be accessed via the GOT, since the tagged address of a global

7943// is incompatible with existing code models. This also applies to non-pic

7944// mode.

7945if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {

7946SDValue Addr =getTargetNode(N,DL, Ty, DAG, 0);

7947if (IsLocal && !Subtarget.allowTaggedGlobals())

7948// Use PC-relative addressing to access the symbol. This generates the

7949// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))

7950// %pcrel_lo(auipc)).

7951return DAG.getNode(RISCVISD::LLA,DL, Ty,Addr);

7952

7953// Use PC-relative addressing to access the GOT for this symbol, then load

7954// the address from the GOT. This generates the pattern (PseudoLGA sym),

7955// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).

7956SDValue Load =

7957SDValue(DAG.getMachineNode(RISCV::PseudoLGA,DL, Ty,Addr), 0);

7958MachineFunction &MF = DAG.getMachineFunction();

7959MachineMemOperand *MemOp = MF.getMachineMemOperand(

7960MachinePointerInfo::getGOT(MF),

7961MachineMemOperand::MOLoad |MachineMemOperand::MODereferenceable |

7962MachineMemOperand::MOInvariant,

7963LLT(Ty.getSimpleVT()),Align(Ty.getFixedSizeInBits() / 8));

7964 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});

7965returnLoad;

7966 }

7967

7968switch (getTargetMachine().getCodeModel()) {

7969default:

7970report_fatal_error("Unsupported code model for lowering");

7971caseCodeModel::Small: {

7972// Generate a sequence for accessing addresses within the first 2 GiB of

7973// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).

7974SDValue AddrHi =getTargetNode(N,DL, Ty, DAG,RISCVII::MO_HI);

7975SDValue AddrLo =getTargetNode(N,DL, Ty, DAG,RISCVII::MO_LO);

7976SDValue MNHi = DAG.getNode(RISCVISD::HI,DL, Ty, AddrHi);

7977return DAG.getNode(RISCVISD::ADD_LO,DL, Ty, MNHi, AddrLo);

7978 }

7979caseCodeModel::Medium: {

7980SDValue Addr =getTargetNode(N,DL, Ty, DAG, 0);

7981if (IsExternWeak) {

7982// An extern weak symbol may be undefined, i.e. have value 0, which may

7983// not be within 2GiB of PC, so use GOT-indirect addressing to access the

7984// symbol. This generates the pattern (PseudoLGA sym), which expands to

7985// (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).

7986SDValue Load =

7987SDValue(DAG.getMachineNode(RISCV::PseudoLGA,DL, Ty,Addr), 0);

7988MachineFunction &MF = DAG.getMachineFunction();

7989MachineMemOperand *MemOp = MF.getMachineMemOperand(

7990MachinePointerInfo::getGOT(MF),

7991MachineMemOperand::MOLoad |MachineMemOperand::MODereferenceable |

7992MachineMemOperand::MOInvariant,

7993LLT(Ty.getSimpleVT()),Align(Ty.getFixedSizeInBits() / 8));

7994 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});

7995returnLoad;

7996 }

7997

7998// Generate a sequence for accessing addresses within any 2GiB range within

7999// the address space. This generates the pattern (PseudoLLA sym), which

8000// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).

8001return DAG.getNode(RISCVISD::LLA,DL, Ty,Addr);

8002 }

8003caseCodeModel::Large: {

8004if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))

8005returngetLargeGlobalAddress(G,DL, Ty, DAG);

8006

8007// Using pc-relative mode for other node type.

8008SDValue Addr =getTargetNode(N,DL, Ty, DAG, 0);

8009return DAG.getNode(RISCVISD::LLA,DL, Ty,Addr);

8010 }

8011 }

8012}

8013

8014SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,

8015SelectionDAG &DAG) const{

8016GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);

8017assert(N->getOffset() == 0 &&"unexpected offset in global node");

8018constGlobalValue *GV =N->getGlobal();

8019return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());

8020}

8021

8022SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,

8023SelectionDAG &DAG) const{

8024BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);

8025

8026return getAddr(N, DAG);

8027}

8028

8029SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,

8030SelectionDAG &DAG) const{

8031ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);

8032

8033return getAddr(N, DAG);

8034}

8035

8036SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,

8037SelectionDAG &DAG) const{

8038JumpTableSDNode *N = cast<JumpTableSDNode>(Op);

8039

8040return getAddr(N, DAG);

8041}

8042

8043SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,

8044SelectionDAG &DAG,

8045bool UseGOT) const{

8046SDLoc DL(N);

8047EVT Ty =getPointerTy(DAG.getDataLayout());

8048constGlobalValue *GV =N->getGlobal();

8049MVT XLenVT = Subtarget.getXLenVT();

8050

8051if (UseGOT) {

8052// Use PC-relative addressing to access the GOT for this TLS symbol, then

8053// load the address from the GOT and add the thread pointer. This generates

8054// the pattern (PseudoLA_TLS_IE sym), which expands to

8055// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).

8056SDValue Addr = DAG.getTargetGlobalAddress(GV,DL, Ty, 0, 0);

8057SDValue Load =

8058SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE,DL, Ty,Addr), 0);

8059MachineFunction &MF = DAG.getMachineFunction();

8060MachineMemOperand *MemOp = MF.getMachineMemOperand(

8061MachinePointerInfo::getGOT(MF),

8062MachineMemOperand::MOLoad |MachineMemOperand::MODereferenceable |

8063MachineMemOperand::MOInvariant,

8064LLT(Ty.getSimpleVT()),Align(Ty.getFixedSizeInBits() / 8));

8065 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});

8066

8067// Add the thread pointer.

8068SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);

8069return DAG.getNode(ISD::ADD,DL, Ty, Load, TPReg);

8070 }

8071

8072// Generate a sequence for accessing the address relative to the thread

8073// pointer, with the appropriate adjustment for the thread pointer offset.

8074// This generates the pattern

8075// (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))

8076SDValue AddrHi =

8077 DAG.getTargetGlobalAddress(GV,DL, Ty, 0,RISCVII::MO_TPREL_HI);

8078SDValue AddrAdd =

8079 DAG.getTargetGlobalAddress(GV,DL, Ty, 0,RISCVII::MO_TPREL_ADD);

8080SDValue AddrLo =

8081 DAG.getTargetGlobalAddress(GV,DL, Ty, 0,RISCVII::MO_TPREL_LO);

8082

8083SDValue MNHi = DAG.getNode(RISCVISD::HI,DL, Ty, AddrHi);

8084SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);

8085SDValue MNAdd =

8086 DAG.getNode(RISCVISD::ADD_TPREL,DL, Ty, MNHi, TPReg, AddrAdd);

8087return DAG.getNode(RISCVISD::ADD_LO,DL, Ty, MNAdd, AddrLo);

8088}

8089

8090SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,

8091SelectionDAG &DAG) const{

8092SDLoc DL(N);

8093EVT Ty =getPointerTy(DAG.getDataLayout());

8094IntegerType *CallTy =Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());

8095constGlobalValue *GV =N->getGlobal();

8096

8097// Use a PC-relative addressing mode to access the global dynamic GOT address.

8098// This generates the pattern (PseudoLA_TLS_GD sym), which expands to

8099// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).

8100SDValue Addr = DAG.getTargetGlobalAddress(GV,DL, Ty, 0, 0);

8101SDValue Load =

8102SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD,DL, Ty,Addr), 0);

8103

8104// Prepare argument list to generate call.

8105ArgListTy Args;

8106 ArgListEntryEntry;

8107Entry.Node =Load;

8108Entry.Ty = CallTy;

8109Args.push_back(Entry);

8110

8111// Setup call to __tls_get_addr.

8112TargetLowering::CallLoweringInfo CLI(DAG);

8113 CLI.setDebugLoc(DL)

8114 .setChain(DAG.getEntryNode())

8115 .setLibCallee(CallingConv::C, CallTy,

8116 DAG.getExternalSymbol("__tls_get_addr", Ty),

8117 std::move(Args));

8118

8119returnLowerCallTo(CLI).first;

8120}

8121

8122SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,

8123SelectionDAG &DAG) const{

8124SDLoc DL(N);

8125EVT Ty =getPointerTy(DAG.getDataLayout());

8126constGlobalValue *GV =N->getGlobal();

8127

8128// Use a PC-relative addressing mode to access the global dynamic GOT address.

8129// This generates the pattern (PseudoLA_TLSDESC sym), which expands to

8130//

8131// auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)

8132// lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)

8133// addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)

8134// jalr t0, tY // R_RISCV_TLSDESC_CALL(label)

8135SDValue Addr = DAG.getTargetGlobalAddress(GV,DL, Ty, 0, 0);

8136returnSDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC,DL, Ty,Addr), 0);

8137}

8138

8139SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,

8140SelectionDAG &DAG) const{

8141GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);

8142assert(N->getOffset() == 0 &&"unexpected offset in global node");

8143

8144if (DAG.getTarget().useEmulatedTLS())

8145returnLowerToTLSEmulatedModel(N, DAG);

8146

8147TLSModel::Model Model =getTargetMachine().getTLSModel(N->getGlobal());

8148

8149if (DAG.getMachineFunction().getFunction().getCallingConv() ==

8150CallingConv::GHC)

8151report_fatal_error("In GHC calling convention TLS is not supported");

8152

8153SDValue Addr;

8154switch (Model) {

8155caseTLSModel::LocalExec:

8156Addr = getStaticTLSAddr(N, DAG,/*UseGOT=*/false);

8157break;

8158caseTLSModel::InitialExec:

8159Addr = getStaticTLSAddr(N, DAG,/*UseGOT=*/true);

8160break;

8161caseTLSModel::LocalDynamic:

8162caseTLSModel::GeneralDynamic:

8163Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)

8164 : getDynamicTLSAddr(N, DAG);

8165break;

8166 }

8167

8168returnAddr;

8169}

8170

8171// Return true if Val is equal to (setcc LHS, RHS, CC).

8172// Return false if Val is the inverse of (setcc LHS, RHS, CC).

8173// Otherwise, return std::nullopt.

8174static std::optional<bool>matchSetCC(SDValue LHS,SDValue RHS,

8175ISD::CondCode CC,SDValue Val) {

8176assert(Val->getOpcode() ==ISD::SETCC);

8177SDValue LHS2 = Val.getOperand(0);

8178SDValue RHS2 = Val.getOperand(1);

8179ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();

8180

8181if (LHS == LHS2 &&RHS == RHS2) {

8182if (CC == CC2)

8183returntrue;

8184if (CC ==ISD::getSetCCInverse(CC2, LHS2.getValueType()))

8185returnfalse;

8186 }elseif (LHS == RHS2 &&RHS == LHS2) {

8187 CC2 =ISD::getSetCCSwappedOperands(CC2);

8188if (CC == CC2)

8189returntrue;

8190if (CC ==ISD::getSetCCInverse(CC2, LHS2.getValueType()))

8191returnfalse;

8192 }

8193

8194return std::nullopt;

8195}

8196

8197staticSDValue combineSelectToBinOp(SDNode *N,SelectionDAG &DAG,

8198constRISCVSubtarget &Subtarget) {

8199SDValue CondV =N->getOperand(0);

8200SDValue TrueV =N->getOperand(1);

8201SDValue FalseV =N->getOperand(2);

8202MVT VT =N->getSimpleValueType(0);

8203SDLoc DL(N);

8204

8205if (!Subtarget.hasConditionalMoveFusion()) {

8206// (select c, -1, y) -> -c | y

8207if (isAllOnesConstant(TrueV)) {

8208SDValue Neg = DAG.getNegative(CondV,DL, VT);

8209return DAG.getNode(ISD::OR,DL, VT, Neg, DAG.getFreeze(FalseV));

8210 }

8211// (select c, y, -1) -> (c-1) | y

8212if (isAllOnesConstant(FalseV)) {

8213SDValue Neg = DAG.getNode(ISD::ADD,DL, VT, CondV,

8214 DAG.getAllOnesConstant(DL, VT));

8215return DAG.getNode(ISD::OR,DL, VT, Neg, DAG.getFreeze(TrueV));

8216 }

8217

8218// (select c, 0, y) -> (c-1) & y

8219if (isNullConstant(TrueV)) {

8220SDValue Neg = DAG.getNode(ISD::ADD,DL, VT, CondV,

8221 DAG.getAllOnesConstant(DL, VT));

8222return DAG.getNode(ISD::AND,DL, VT, Neg, DAG.getFreeze(FalseV));

8223 }

8224// (select c, y, 0) -> -c & y

8225if (isNullConstant(FalseV)) {

8226SDValue Neg = DAG.getNegative(CondV,DL, VT);

8227return DAG.getNode(ISD::AND,DL, VT, Neg, DAG.getFreeze(TrueV));

8228 }

8229 }

8230

8231// select c, ~x, x --> xor -c, x

8232if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {

8233constAPInt &TrueVal = TrueV->getAsAPIntVal();

8234constAPInt &FalseVal = FalseV->getAsAPIntVal();

8235if (~TrueVal == FalseVal) {

8236SDValue Neg = DAG.getNegative(CondV,DL, VT);

8237return DAG.getNode(ISD::XOR,DL, VT, Neg, FalseV);

8238 }

8239 }

8240

8241// Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops

8242// when both truev and falsev are also setcc.

8243if (CondV.getOpcode() ==ISD::SETCC && TrueV.getOpcode() ==ISD::SETCC &&

8244 FalseV.getOpcode() ==ISD::SETCC) {

8245SDValue LHS = CondV.getOperand(0);

8246SDValue RHS = CondV.getOperand(1);

8247ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();

8248

8249// (select x, x, y) -> x | y

8250// (select !x, x, y) -> x & y

8251if (std::optional<bool> MatchResult =matchSetCC(LHS,RHS,CC, TrueV)) {

8252return DAG.getNode(*MatchResult ?ISD::OR :ISD::AND,DL, VT, TrueV,

8253 DAG.getFreeze(FalseV));

8254 }

8255// (select x, y, x) -> x & y

8256// (select !x, y, x) -> x | y

8257if (std::optional<bool> MatchResult =matchSetCC(LHS,RHS,CC, FalseV)) {

8258return DAG.getNode(*MatchResult ?ISD::AND :ISD::OR,DL, VT,

8259 DAG.getFreeze(TrueV), FalseV);

8260 }

8261 }

8262

8263returnSDValue();

8264}

8265

8266// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants

8267// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.

8268// For now we only consider transformation profitable if `binOp(c0, c1)` ends up

8269// being `0` or `-1`. In such cases we can replace `select` with `and`.

8270// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize

8271// than `c0`?

8272staticSDValue

8273foldBinOpIntoSelectIfProfitable(SDNode *BO,SelectionDAG &DAG,

8274constRISCVSubtarget &Subtarget) {

8275if (Subtarget.hasShortForwardBranchOpt())

8276returnSDValue();

8277

8278unsigned SelOpNo = 0;

8279SDValue Sel = BO->getOperand(0);

8280if (Sel.getOpcode() !=ISD::SELECT || !Sel.hasOneUse()) {

8281 SelOpNo = 1;

8282 Sel = BO->getOperand(1);

8283 }

8284

8285if (Sel.getOpcode() !=ISD::SELECT || !Sel.hasOneUse())

8286returnSDValue();

8287

8288unsigned ConstSelOpNo = 1;

8289unsigned OtherSelOpNo = 2;

8290if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {

8291 ConstSelOpNo = 2;

8292 OtherSelOpNo = 1;

8293 }

8294SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);

8295ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);

8296if (!ConstSelOpNode || ConstSelOpNode->isOpaque())

8297returnSDValue();

8298

8299SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);

8300ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);

8301if (!ConstBinOpNode || ConstBinOpNode->isOpaque())

8302returnSDValue();

8303

8304SDLoc DL(Sel);

8305EVT VT = BO->getValueType(0);

8306

8307SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};

8308if (SelOpNo == 1)

8309std::swap(NewConstOps[0], NewConstOps[1]);

8310

8311SDValue NewConstOp =

8312 DAG.FoldConstantArithmetic(BO->getOpcode(),DL, VT, NewConstOps);

8313if (!NewConstOp)

8314returnSDValue();

8315

8316constAPInt &NewConstAPInt = NewConstOp->getAsAPIntVal();

8317if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())

8318returnSDValue();

8319

8320SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);

8321SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};

8322if (SelOpNo == 1)

8323std::swap(NewNonConstOps[0], NewNonConstOps[1]);

8324SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(),DL, VT, NewNonConstOps);

8325

8326SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;

8327SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;

8328return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);

8329}

8330

8331SDValue RISCVTargetLowering::lowerSELECT(SDValue Op,SelectionDAG &DAG) const{

8332SDValue CondV =Op.getOperand(0);

8333SDValue TrueV =Op.getOperand(1);

8334SDValue FalseV =Op.getOperand(2);

8335SDLoc DL(Op);

8336MVT VT =Op.getSimpleValueType();

8337MVT XLenVT = Subtarget.getXLenVT();

8338

8339// Lower vector SELECTs to VSELECTs by splatting the condition.

8340if (VT.isVector()) {

8341MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);

8342SDValue CondSplat = DAG.getSplat(SplatCondVT,DL, CondV);

8343return DAG.getNode(ISD::VSELECT,DL, VT, CondSplat, TrueV, FalseV);

8344 }

8345

8346// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ

8347// nodes to implement the SELECT. Performing the lowering here allows for

8348// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless

8349// sequence or RISCVISD::SELECT_CC node (branch-based select).

8350if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&

8351 VT.isScalarInteger()) {

8352// (select c, t, 0) -> (czero_eqz t, c)

8353if (isNullConstant(FalseV))

8354return DAG.getNode(RISCVISD::CZERO_EQZ,DL, VT, TrueV, CondV);

8355// (select c, 0, f) -> (czero_nez f, c)

8356if (isNullConstant(TrueV))

8357return DAG.getNode(RISCVISD::CZERO_NEZ,DL, VT, FalseV, CondV);

8358

8359// (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))

8360if (TrueV.getOpcode() ==ISD::AND &&

8361 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))

8362return DAG.getNode(

8363ISD::OR,DL, VT, TrueV,

8364 DAG.getNode(RISCVISD::CZERO_NEZ,DL, VT, FalseV, CondV));

8365// (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))

8366if (FalseV.getOpcode() ==ISD::AND &&

8367 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))

8368return DAG.getNode(

8369ISD::OR,DL, VT, FalseV,

8370 DAG.getNode(RISCVISD::CZERO_EQZ,DL, VT, TrueV, CondV));

8371

8372// Try some other optimizations before falling back to generic lowering.

8373if (SDValue V =combineSelectToBinOp(Op.getNode(), DAG, Subtarget))

8374returnV;

8375

8376// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)

8377// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)

8378if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {

8379constAPInt &TrueVal = TrueV->getAsAPIntVal();

8380constAPInt &FalseVal = FalseV->getAsAPIntVal();

8381constint TrueValCost =RISCVMatInt::getIntMatCost(

8382 TrueVal, Subtarget.getXLen(), Subtarget,/*CompressionCost=*/true);

8383constint FalseValCost =RISCVMatInt::getIntMatCost(

8384 FalseVal, Subtarget.getXLen(), Subtarget,/*CompressionCost=*/true);

8385bool IsCZERO_NEZ = TrueValCost <= FalseValCost;

8386SDValue LHSVal = DAG.getConstant(

8387 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal,DL, VT);

8388SDValue RHSVal =

8389 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal,DL, VT);

8390SDValue CMOV =

8391 DAG.getNode(IsCZERO_NEZ ?RISCVISD::CZERO_NEZ :RISCVISD::CZERO_EQZ,

8392DL, VT, LHSVal, CondV);

8393return DAG.getNode(ISD::ADD,DL, VT, CMOV, RHSVal);

8394 }

8395

8396// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))

8397// Unless we have the short forward branch optimization.

8398if (!Subtarget.hasConditionalMoveFusion())

8399return DAG.getNode(

8400ISD::OR,DL, VT,

8401 DAG.getNode(RISCVISD::CZERO_EQZ,DL, VT, TrueV, CondV),

8402 DAG.getNode(RISCVISD::CZERO_NEZ,DL, VT, FalseV, CondV));

8403 }

8404

8405if (SDValue V =combineSelectToBinOp(Op.getNode(), DAG, Subtarget))

8406returnV;

8407

8408if (Op.hasOneUse()) {

8409unsigned UseOpc =Op->user_begin()->getOpcode();

8410if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {

8411SDNode *BinOp = *Op->user_begin();

8412if (SDValue NewSel =foldBinOpIntoSelectIfProfitable(*Op->user_begin(),

8413 DAG, Subtarget)) {

8414 DAG.ReplaceAllUsesWith(BinOp, &NewSel);

8415// Opcode check is necessary because foldBinOpIntoSelectIfProfitable

8416// may return a constant node and cause crash in lowerSELECT.

8417if (NewSel.getOpcode() ==ISD::SELECT)

8418return lowerSELECT(NewSel, DAG);

8419return NewSel;

8420 }

8421 }

8422 }

8423

8424// (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))

8425// (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))

8426constConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);

8427constConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);

8428if (FPTV && FPFV) {

8429if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))

8430return DAG.getNode(ISD::SINT_TO_FP,DL, VT, CondV);

8431if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {

8432SDValue XOR = DAG.getNode(ISD::XOR,DL, XLenVT, CondV,

8433 DAG.getConstant(1,DL, XLenVT));

8434return DAG.getNode(ISD::SINT_TO_FP,DL, VT, XOR);

8435 }

8436 }

8437

8438// If the condition is not an integer SETCC which operates on XLenVT, we need

8439// to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:

8440// (select condv, truev, falsev)

8441// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)

8442if (CondV.getOpcode() !=ISD::SETCC ||

8443 CondV.getOperand(0).getSimpleValueType() != XLenVT) {

8444SDValue Zero = DAG.getConstant(0,DL, XLenVT);

8445SDValue SetNE = DAG.getCondCode(ISD::SETNE);

8446

8447SDValue Ops[] = {CondV,Zero, SetNE, TrueV, FalseV};

8448

8449return DAG.getNode(RISCVISD::SELECT_CC,DL, VT, Ops);

8450 }

8451

8452// If the CondV is the output of a SETCC node which operates on XLenVT inputs,

8453// then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take

8454// advantage of the integer compare+branch instructions. i.e.:

8455// (select (setcc lhs, rhs, cc), truev, falsev)

8456// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)

8457SDValue LHS = CondV.getOperand(0);

8458SDValue RHS = CondV.getOperand(1);

8459ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();

8460

8461// Special case for a select of 2 constants that have a diffence of 1.

8462// Normally this is done by DAGCombine, but if the select is introduced by

8463// type legalization or op legalization, we miss it. Restricting to SETLT

8464// case for now because that is what signed saturating add/sub need.

8465// FIXME: We don't need the condition to be SETLT or even a SETCC,

8466// but we would probably want to swap the true/false values if the condition

8467// is SETGE/SETLE to avoid an XORI.

8468if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&

8469 CCVal ==ISD::SETLT) {

8470constAPInt &TrueVal = TrueV->getAsAPIntVal();

8471constAPInt &FalseVal = FalseV->getAsAPIntVal();

8472if (TrueVal - 1 == FalseVal)

8473return DAG.getNode(ISD::ADD,DL, VT, CondV, FalseV);

8474if (TrueVal + 1 == FalseVal)

8475return DAG.getNode(ISD::SUB,DL, VT, FalseV, CondV);

8476 }

8477

8478translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);

8479// 1 < x ? x : 1 -> 0 < x ? x : 1

8480if (isOneConstant(LHS) && (CCVal ==ISD::SETLT || CCVal ==ISD::SETULT) &&

8481 RHS == TrueV && LHS == FalseV) {

8482LHS = DAG.getConstant(0,DL, VT);

8483// 0 <u x is the same as x != 0.

8484if (CCVal ==ISD::SETULT) {

8485std::swap(LHS, RHS);

8486 CCVal =ISD::SETNE;

8487 }

8488 }

8489

8490// x <s -1 ? x : -1 -> x <s 0 ? x : -1

8491if (isAllOnesConstant(RHS) && CCVal ==ISD::SETLT && LHS == TrueV &&

8492 RHS == FalseV) {

8493RHS = DAG.getConstant(0,DL, VT);

8494 }

8495

8496SDValue TargetCC = DAG.getCondCode(CCVal);

8497

8498if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {

8499// (select (setcc lhs, rhs, CC), constant, falsev)

8500// -> (select (setcc lhs, rhs, InverseCC), falsev, constant)

8501std::swap(TrueV, FalseV);

8502 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal,LHS.getValueType()));

8503 }

8504

8505SDValue Ops[] = {LHS,RHS, TargetCC, TrueV, FalseV};

8506return DAG.getNode(RISCVISD::SELECT_CC,DL, VT, Ops);

8507}

8508

8509SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op,SelectionDAG &DAG) const{

8510SDValue CondV =Op.getOperand(1);

8511SDLoc DL(Op);

8512MVT XLenVT = Subtarget.getXLenVT();

8513

8514if (CondV.getOpcode() ==ISD::SETCC &&

8515 CondV.getOperand(0).getValueType() == XLenVT) {

8516SDValue LHS = CondV.getOperand(0);

8517SDValue RHS = CondV.getOperand(1);

8518ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();

8519

8520translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);

8521

8522SDValue TargetCC = DAG.getCondCode(CCVal);

8523return DAG.getNode(RISCVISD::BR_CC,DL,Op.getValueType(),Op.getOperand(0),

8524 LHS, RHS, TargetCC,Op.getOperand(2));

8525 }

8526

8527return DAG.getNode(RISCVISD::BR_CC,DL,Op.getValueType(),Op.getOperand(0),

8528 CondV, DAG.getConstant(0,DL, XLenVT),

8529 DAG.getCondCode(ISD::SETNE),Op.getOperand(2));

8530}

8531

8532SDValue RISCVTargetLowering::lowerVASTART(SDValue Op,SelectionDAG &DAG) const{

8533MachineFunction &MF = DAG.getMachineFunction();

8534RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();

8535

8536SDLoc DL(Op);

8537SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),

8538getPointerTy(MF.getDataLayout()));

8539

8540// vastart just stores the address of the VarArgsFrameIndex slot into the

8541// memory location argument.

8542constValue *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

8543return DAG.getStore(Op.getOperand(0),DL, FI,Op.getOperand(1),

8544MachinePointerInfo(SV));

8545}

8546

8547SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,

8548SelectionDAG &DAG) const{

8549constRISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();

8550MachineFunction &MF = DAG.getMachineFunction();

8551MachineFrameInfo &MFI = MF.getFrameInfo();

8552 MFI.setFrameAddressIsTaken(true);

8553Register FrameReg = RI.getFrameRegister(MF);

8554int XLenInBytes = Subtarget.getXLen() / 8;

8555

8556EVT VT =Op.getValueType();

8557SDLoc DL(Op);

8558SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(),DL, FrameReg, VT);

8559unsignedDepth =Op.getConstantOperandVal(0);

8560while (Depth--) {

8561intOffset = -(XLenInBytes * 2);

8562SDValue Ptr = DAG.getNode(

8563ISD::ADD,DL, VT, FrameAddr,

8564 DAG.getSignedConstant(Offset,DL,getPointerTy(DAG.getDataLayout())));

8565 FrameAddr =

8566 DAG.getLoad(VT,DL, DAG.getEntryNode(),Ptr,MachinePointerInfo());

8567 }

8568return FrameAddr;

8569}

8570

8571SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,

8572SelectionDAG &DAG) const{

8573constRISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();

8574MachineFunction &MF = DAG.getMachineFunction();

8575MachineFrameInfo &MFI = MF.getFrameInfo();

8576 MFI.setReturnAddressIsTaken(true);

8577MVT XLenVT = Subtarget.getXLenVT();

8578int XLenInBytes = Subtarget.getXLen() / 8;

8579

8580if (verifyReturnAddressArgumentIsConstant(Op, DAG))

8581returnSDValue();

8582

8583EVT VT =Op.getValueType();

8584SDLoc DL(Op);

8585unsignedDepth =Op.getConstantOperandVal(0);

8586if (Depth) {

8587intOff = -XLenInBytes;

8588SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);

8589SDValue Offset = DAG.getSignedConstant(Off,DL, VT);

8590return DAG.getLoad(VT,DL, DAG.getEntryNode(),

8591 DAG.getNode(ISD::ADD,DL, VT, FrameAddr,Offset),

8592MachinePointerInfo());

8593 }

8594

8595// Return the value of the return address register, marking it an implicit

8596// live-in.

8597Register Reg = MF.addLiveIn(RI.getRARegister(),getRegClassFor(XLenVT));

8598return DAG.getCopyFromReg(DAG.getEntryNode(),DL, Reg, XLenVT);

8599}

8600

8601SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,

8602SelectionDAG &DAG) const{

8603SDLoc DL(Op);

8604SDValue Lo =Op.getOperand(0);

8605SDValue Hi =Op.getOperand(1);

8606SDValue Shamt =Op.getOperand(2);

8607EVT VT =Lo.getValueType();

8608

8609// if Shamt-XLEN < 0: // Shamt < XLEN

8610// Lo = Lo << Shamt

8611// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))

8612// else:

8613// Lo = 0

8614// Hi = Lo << (Shamt-XLEN)

8615

8616SDValue Zero = DAG.getConstant(0,DL, VT);

8617SDValue One = DAG.getConstant(1,DL, VT);

8618SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(),DL, VT);

8619SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1,DL, VT);

8620SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD,DL, VT, Shamt, MinusXLen);

8621SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB,DL, VT, XLenMinus1, Shamt);

8622

8623SDValue LoTrue = DAG.getNode(ISD::SHL,DL, VT,Lo, Shamt);

8624SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL,DL, VT,Lo, One);

8625SDValue ShiftRightLo =

8626 DAG.getNode(ISD::SRL,DL, VT, ShiftRight1Lo, XLenMinus1Shamt);

8627SDValue ShiftLeftHi = DAG.getNode(ISD::SHL,DL, VT,Hi, Shamt);

8628SDValue HiTrue = DAG.getNode(ISD::OR,DL, VT, ShiftLeftHi, ShiftRightLo);

8629SDValue HiFalse = DAG.getNode(ISD::SHL,DL, VT,Lo, ShamtMinusXLen);

8630

8631SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero,ISD::SETLT);

8632

8633Lo = DAG.getNode(ISD::SELECT,DL, VT,CC, LoTrue, Zero);

8634Hi = DAG.getNode(ISD::SELECT,DL, VT,CC, HiTrue, HiFalse);

8635

8636SDValue Parts[2] = {Lo,Hi};

8637return DAG.getMergeValues(Parts,DL);

8638}

8639

8640SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op,SelectionDAG &DAG,

8641bool IsSRA) const{

8642SDLoc DL(Op);

8643SDValue Lo =Op.getOperand(0);

8644SDValue Hi =Op.getOperand(1);

8645SDValue Shamt =Op.getOperand(2);

8646EVT VT =Lo.getValueType();

8647

8648// SRA expansion:

8649// if Shamt-XLEN < 0: // Shamt < XLEN

8650// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))

8651// Hi = Hi >>s Shamt

8652// else:

8653// Lo = Hi >>s (Shamt-XLEN);

8654// Hi = Hi >>s (XLEN-1)

8655//

8656// SRL expansion:

8657// if Shamt-XLEN < 0: // Shamt < XLEN

8658// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))

8659// Hi = Hi >>u Shamt

8660// else:

8661// Lo = Hi >>u (Shamt-XLEN);

8662// Hi = 0;

8663

8664unsigned ShiftRightOp = IsSRA ?ISD::SRA :ISD::SRL;

8665

8666SDValue Zero = DAG.getConstant(0,DL, VT);

8667SDValue One = DAG.getConstant(1,DL, VT);

8668SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(),DL, VT);

8669SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1,DL, VT);

8670SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD,DL, VT, Shamt, MinusXLen);

8671SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB,DL, VT, XLenMinus1, Shamt);

8672

8673SDValue ShiftRightLo = DAG.getNode(ISD::SRL,DL, VT,Lo, Shamt);

8674SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL,DL, VT,Hi, One);

8675SDValue ShiftLeftHi =

8676 DAG.getNode(ISD::SHL,DL, VT, ShiftLeftHi1, XLenMinus1Shamt);

8677SDValue LoTrue = DAG.getNode(ISD::OR,DL, VT, ShiftRightLo, ShiftLeftHi);

8678SDValue HiTrue = DAG.getNode(ShiftRightOp,DL, VT,Hi, Shamt);

8679SDValue LoFalse = DAG.getNode(ShiftRightOp,DL, VT,Hi, ShamtMinusXLen);

8680SDValue HiFalse =

8681 IsSRA ? DAG.getNode(ISD::SRA,DL, VT,Hi, XLenMinus1) :Zero;

8682

8683SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero,ISD::SETLT);

8684

8685Lo = DAG.getNode(ISD::SELECT,DL, VT,CC, LoTrue, LoFalse);

8686Hi = DAG.getNode(ISD::SELECT,DL, VT,CC, HiTrue, HiFalse);

8687

8688SDValue Parts[2] = {Lo,Hi};

8689return DAG.getMergeValues(Parts,DL);

8690}

8691

8692// Lower splats of i1 types to SETCC. For each mask vector type, we have a

8693// legal equivalently-sized i8 type, so we can use that as a go-between.

8694SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,

8695SelectionDAG &DAG) const{

8696SDLoc DL(Op);

8697MVT VT =Op.getSimpleValueType();

8698SDValue SplatVal =Op.getOperand(0);

8699// All-zeros or all-ones splats are handled specially.

8700if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {

8701SDValue VL =getDefaultScalableVLOps(VT,DL, DAG, Subtarget).second;

8702return DAG.getNode(RISCVISD::VMSET_VL,DL, VT, VL);

8703 }

8704if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {

8705SDValue VL =getDefaultScalableVLOps(VT,DL, DAG, Subtarget).second;

8706return DAG.getNode(RISCVISD::VMCLR_VL,DL, VT, VL);

8707 }

8708MVT InterVT = VT.changeVectorElementType(MVT::i8);

8709 SplatVal = DAG.getNode(ISD::AND,DL, SplatVal.getValueType(), SplatVal,

8710 DAG.getConstant(1,DL, SplatVal.getValueType()));

8711SDValue LHS = DAG.getSplatVector(InterVT,DL, SplatVal);

8712SDValue Zero = DAG.getConstant(0,DL, InterVT);

8713return DAG.getSetCC(DL, VT, LHS, Zero,ISD::SETNE);

8714}

8715

8716// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is

8717// illegal (currently only vXi64 RV32).

8718// FIXME: We could also catch non-constant sign-extended i32 values and lower

8719// them to VMV_V_X_VL.

8720SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,

8721SelectionDAG &DAG) const{

8722SDLoc DL(Op);

8723MVT VecVT =Op.getSimpleValueType();

8724assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&

8725"Unexpected SPLAT_VECTOR_PARTS lowering");

8726

8727assert(Op.getNumOperands() == 2 &&"Unexpected number of operands!");

8728SDValue Lo =Op.getOperand(0);

8729SDValue Hi =Op.getOperand(1);

8730

8731MVT ContainerVT = VecVT;

8732if (VecVT.isFixedLengthVector())

8733 ContainerVT =getContainerForFixedLengthVector(VecVT);

8734

8735auto VL =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget).second;

8736

8737SDValue Res =

8738splatPartsI64WithVL(DL, ContainerVT,SDValue(),Lo,Hi, VL, DAG);

8739

8740if (VecVT.isFixedLengthVector())

8741 Res =convertFromScalableVector(VecVT, Res, DAG, Subtarget);

8742

8743return Res;

8744}

8745

8746// Custom-lower extensions from mask vectors by using a vselect either with 1

8747// for zero/any-extension or -1 for sign-extension:

8748// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)

8749// Note that any-extension is lowered identically to zero-extension.

8750SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op,SelectionDAG &DAG,

8751 int64_t ExtTrueVal) const{

8752SDLoc DL(Op);

8753MVT VecVT =Op.getSimpleValueType();

8754SDValue Src =Op.getOperand(0);

8755// Only custom-lower extensions from mask types

8756assert(Src.getValueType().isVector() &&

8757 Src.getValueType().getVectorElementType() == MVT::i1);

8758

8759if (VecVT.isScalableVector()) {

8760SDValue SplatZero = DAG.getConstant(0,DL, VecVT);

8761SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal,DL, VecVT);

8762return DAG.getNode(ISD::VSELECT,DL, VecVT, Src, SplatTrueVal, SplatZero);

8763 }

8764

8765MVT ContainerVT =getContainerForFixedLengthVector(VecVT);

8766MVT I1ContainerVT =

8767MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());

8768

8769SDValue CC =convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);

8770

8771SDValue VL =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget).second;

8772

8773MVT XLenVT = Subtarget.getXLenVT();

8774SDValue SplatZero = DAG.getConstant(0,DL, XLenVT);

8775SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal,DL, XLenVT);

8776

8777 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

8778 DAG.getUNDEF(ContainerVT), SplatZero, VL);

8779 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

8780 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);

8781SDValue Select =

8782 DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT,CC, SplatTrueVal,

8783 SplatZero, DAG.getUNDEF(ContainerVT), VL);

8784

8785returnconvertFromScalableVector(VecVT,Select, DAG, Subtarget);

8786}

8787

8788SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(

8789SDValue Op,SelectionDAG &DAG,unsigned ExtendOpc) const{

8790MVT ExtVT =Op.getSimpleValueType();

8791// Only custom-lower extensions from fixed-length vector types.

8792if (!ExtVT.isFixedLengthVector())

8793returnOp;

8794MVT VT =Op.getOperand(0).getSimpleValueType();

8795// Grab the canonical container type for the extended type. Infer the smaller

8796// type from that to ensure the same number of vector elements, as we know

8797// the LMUL will be sufficient to hold the smaller type.

8798MVT ContainerExtVT =getContainerForFixedLengthVector(ExtVT);

8799// Get the extended container type manually to ensure the same number of

8800// vector elements between source and dest.

8801MVT ContainerVT =MVT::getVectorVT(VT.getVectorElementType(),

8802 ContainerExtVT.getVectorElementCount());

8803

8804SDValue Op1 =

8805convertToScalableVector(ContainerVT,Op.getOperand(0), DAG, Subtarget);

8806

8807SDLoc DL(Op);

8808auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

8809

8810SDValue Ext = DAG.getNode(ExtendOpc,DL, ContainerExtVT, Op1, Mask, VL);

8811

8812returnconvertFromScalableVector(ExtVT, Ext, DAG, Subtarget);

8813}

8814

8815// Custom-lower truncations from vectors to mask vectors by using a mask and a

8816// setcc operation:

8817// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)

8818SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,

8819SelectionDAG &DAG) const{

8820bool IsVPTrunc =Op.getOpcode() == ISD::VP_TRUNCATE;

8821SDLoc DL(Op);

8822EVT MaskVT =Op.getValueType();

8823// Only expect to custom-lower truncations to mask types

8824assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&

8825"Unexpected type for vector mask lowering");

8826SDValue Src =Op.getOperand(0);

8827MVT VecVT = Src.getSimpleValueType();

8828SDValue Mask, VL;

8829if (IsVPTrunc) {

8830Mask =Op.getOperand(1);

8831 VL =Op.getOperand(2);

8832 }

8833// If this is a fixed vector, we need to convert it to a scalable vector.

8834MVT ContainerVT = VecVT;

8835

8836if (VecVT.isFixedLengthVector()) {

8837 ContainerVT =getContainerForFixedLengthVector(VecVT);

8838 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

8839if (IsVPTrunc) {

8840MVT MaskContainerVT =

8841getContainerForFixedLengthVector(Mask.getSimpleValueType());

8842Mask =convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);

8843 }

8844 }

8845

8846if (!IsVPTrunc) {

8847 std::tie(Mask, VL) =

8848getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);

8849 }

8850

8851SDValue SplatOne = DAG.getConstant(1,DL, Subtarget.getXLenVT());

8852SDValue SplatZero = DAG.getConstant(0,DL, Subtarget.getXLenVT());

8853

8854 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

8855 DAG.getUNDEF(ContainerVT), SplatOne, VL);

8856 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

8857 DAG.getUNDEF(ContainerVT), SplatZero, VL);

8858

8859MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);

8860SDValue Trunc = DAG.getNode(RISCVISD::AND_VL,DL, ContainerVT, Src, SplatOne,

8861 DAG.getUNDEF(ContainerVT), Mask, VL);

8862 Trunc = DAG.getNode(RISCVISD::SETCC_VL,DL, MaskContainerVT,

8863 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),

8864 DAG.getUNDEF(MaskContainerVT), Mask, VL});

8865if (MaskVT.isFixedLengthVector())

8866 Trunc =convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);

8867return Trunc;

8868}

8869

8870SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,

8871SelectionDAG &DAG) const{

8872unsigned Opc =Op.getOpcode();

8873bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;

8874SDLoc DL(Op);

8875

8876MVT VT =Op.getSimpleValueType();

8877// Only custom-lower vector truncates

8878assert(VT.isVector() &&"Unexpected type for vector truncate lowering");

8879

8880// Truncates to mask types are handled differently

8881if (VT.getVectorElementType() == MVT::i1)

8882return lowerVectorMaskTruncLike(Op, DAG);

8883

8884// RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary

8885// truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which

8886// truncate by one power of two at a time.

8887MVT DstEltVT = VT.getVectorElementType();

8888

8889SDValue Src =Op.getOperand(0);

8890MVT SrcVT = Src.getSimpleValueType();

8891MVT SrcEltVT = SrcVT.getVectorElementType();

8892

8893assert(DstEltVT.bitsLT(SrcEltVT) &&isPowerOf2_64(DstEltVT.getSizeInBits()) &&

8894isPowerOf2_64(SrcEltVT.getSizeInBits()) &&

8895"Unexpected vector truncate lowering");

8896

8897MVT ContainerVT = SrcVT;

8898SDValue Mask, VL;

8899if (IsVPTrunc) {

8900Mask =Op.getOperand(1);

8901 VL =Op.getOperand(2);

8902 }

8903if (SrcVT.isFixedLengthVector()) {

8904 ContainerVT =getContainerForFixedLengthVector(SrcVT);

8905 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

8906if (IsVPTrunc) {

8907MVT MaskVT =getMaskTypeFor(ContainerVT);

8908Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

8909 }

8910 }

8911

8912SDValue Result = Src;

8913if (!IsVPTrunc) {

8914 std::tie(Mask, VL) =

8915getDefaultVLOps(SrcVT, ContainerVT,DL, DAG, Subtarget);

8916 }

8917

8918unsigned NewOpc;

8919if (Opc ==ISD::TRUNCATE_SSAT_S)

8920 NewOpc =RISCVISD::TRUNCATE_VECTOR_VL_SSAT;

8921elseif (Opc ==ISD::TRUNCATE_USAT_U)

8922 NewOpc =RISCVISD::TRUNCATE_VECTOR_VL_USAT;

8923else

8924 NewOpc =RISCVISD::TRUNCATE_VECTOR_VL;

8925

8926do {

8927 SrcEltVT =MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);

8928MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);

8929Result = DAG.getNode(NewOpc,DL, ResultVT, Result, Mask, VL);

8930 }while (SrcEltVT != DstEltVT);

8931

8932if (SrcVT.isFixedLengthVector())

8933Result =convertFromScalableVector(VT, Result, DAG, Subtarget);

8934

8935returnResult;

8936}

8937

8938SDValue

8939RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,

8940SelectionDAG &DAG) const{

8941SDLoc DL(Op);

8942SDValue Chain =Op.getOperand(0);

8943SDValue Src =Op.getOperand(1);

8944MVT VT =Op.getSimpleValueType();

8945MVT SrcVT = Src.getSimpleValueType();

8946MVT ContainerVT = VT;

8947if (VT.isFixedLengthVector()) {

8948MVT SrcContainerVT =getContainerForFixedLengthVector(SrcVT);

8949 ContainerVT =

8950 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());

8951 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

8952 }

8953

8954auto [Mask, VL] =getDefaultVLOps(SrcVT, ContainerVT,DL, DAG, Subtarget);

8955

8956// RVV can only widen/truncate fp to types double/half the size as the source.

8957if ((VT.getVectorElementType() == MVT::f64 &&

8958 (SrcVT.getVectorElementType() == MVT::f16 ||

8959 SrcVT.getVectorElementType() == MVT::bf16)) ||

8960 ((VT.getVectorElementType() == MVT::f16 ||

8961 VT.getVectorElementType() == MVT::bf16) &&

8962 SrcVT.getVectorElementType() == MVT::f64)) {

8963// For double rounding, the intermediate rounding should be round-to-odd.

8964unsigned InterConvOpc =Op.getOpcode() ==ISD::STRICT_FP_EXTEND

8965 ?RISCVISD::STRICT_FP_EXTEND_VL

8966 :RISCVISD::STRICT_VFNCVT_ROD_VL;

8967MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);

8968 Src = DAG.getNode(InterConvOpc,DL, DAG.getVTList(InterVT, MVT::Other),

8969 Chain, Src, Mask, VL);

8970 Chain = Src.getValue(1);

8971 }

8972

8973unsigned ConvOpc =Op.getOpcode() ==ISD::STRICT_FP_EXTEND

8974 ?RISCVISD::STRICT_FP_EXTEND_VL

8975 :RISCVISD::STRICT_FP_ROUND_VL;

8976SDValue Res = DAG.getNode(ConvOpc,DL, DAG.getVTList(ContainerVT, MVT::Other),

8977 Chain, Src, Mask, VL);

8978if (VT.isFixedLengthVector()) {

8979// StrictFP operations have two result values. Their lowered result should

8980// have same result count.

8981SDValue SubVec =convertFromScalableVector(VT, Res, DAG, Subtarget);

8982 Res = DAG.getMergeValues({SubVec, Res.getValue(1)},DL);

8983 }

8984return Res;

8985}

8986

8987SDValue

8988RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,

8989SelectionDAG &DAG) const{

8990bool IsVP =

8991Op.getOpcode() == ISD::VP_FP_ROUND ||Op.getOpcode() == ISD::VP_FP_EXTEND;

8992bool IsExtend =

8993Op.getOpcode() == ISD::VP_FP_EXTEND ||Op.getOpcode() ==ISD::FP_EXTEND;

8994// RVV can only do truncate fp to types half the size as the source. We

8995// custom-lower f64->f16 rounds via RVV's round-to-odd float

8996// conversion instruction.

8997SDLoc DL(Op);

8998MVT VT =Op.getSimpleValueType();

8999

9000assert(VT.isVector() &&"Unexpected type for vector truncate lowering");

9001

9002SDValue Src =Op.getOperand(0);

9003MVT SrcVT = Src.getSimpleValueType();

9004

9005bool IsDirectExtend =

9006 IsExtend && (VT.getVectorElementType() != MVT::f64 ||

9007 (SrcVT.getVectorElementType() != MVT::f16 &&

9008 SrcVT.getVectorElementType() != MVT::bf16));

9009bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&

9010 VT.getVectorElementType() != MVT::bf16) ||

9011 SrcVT.getVectorElementType() != MVT::f64);

9012

9013bool IsDirectConv = IsDirectExtend || IsDirectTrunc;

9014

9015// Prepare any fixed-length vector operands.

9016MVT ContainerVT = VT;

9017SDValue Mask, VL;

9018if (IsVP) {

9019Mask =Op.getOperand(1);

9020 VL =Op.getOperand(2);

9021 }

9022if (VT.isFixedLengthVector()) {

9023MVT SrcContainerVT =getContainerForFixedLengthVector(SrcVT);

9024 ContainerVT =

9025 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());

9026 Src =convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);

9027if (IsVP) {

9028MVT MaskVT =getMaskTypeFor(ContainerVT);

9029Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

9030 }

9031 }

9032

9033if (!IsVP)

9034 std::tie(Mask, VL) =

9035getDefaultVLOps(SrcVT, ContainerVT,DL, DAG, Subtarget);

9036

9037unsigned ConvOpc = IsExtend ?RISCVISD::FP_EXTEND_VL :RISCVISD::FP_ROUND_VL;

9038

9039if (IsDirectConv) {

9040 Src = DAG.getNode(ConvOpc,DL, ContainerVT, Src, Mask, VL);

9041if (VT.isFixedLengthVector())

9042 Src =convertFromScalableVector(VT, Src, DAG, Subtarget);

9043return Src;

9044 }

9045

9046unsigned InterConvOpc =

9047 IsExtend ?RISCVISD::FP_EXTEND_VL :RISCVISD::VFNCVT_ROD_VL;

9048

9049MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);

9050SDValue IntermediateConv =

9051 DAG.getNode(InterConvOpc,DL, InterVT, Src, Mask, VL);

9052SDValue Result =

9053 DAG.getNode(ConvOpc,DL, ContainerVT, IntermediateConv, Mask, VL);

9054if (VT.isFixedLengthVector())

9055returnconvertFromScalableVector(VT, Result, DAG, Subtarget);

9056returnResult;

9057}

9058

9059// Given a scalable vector type and an index into it, returns the type for the

9060// smallest subvector that the index fits in. This can be used to reduce LMUL

9061// for operations like vslidedown.

9062//

9063// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.

9064static std::optional<MVT>

9065getSmallestVTForIndex(MVT VecVT,unsigned MaxIdx,SDLoc DL,SelectionDAG &DAG,

9066constRISCVSubtarget &Subtarget) {

9067assert(VecVT.isScalableVector());

9068constunsigned EltSize = VecVT.getScalarSizeInBits();

9069constunsigned VectorBitsMin = Subtarget.getRealMinVLen();

9070constunsigned MinVLMAX = VectorBitsMin / EltSize;

9071MVT SmallerVT;

9072if (MaxIdx < MinVLMAX)

9073 SmallerVT =getLMUL1VT(VecVT);

9074elseif (MaxIdx < MinVLMAX * 2)

9075 SmallerVT =getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();

9076elseif (MaxIdx < MinVLMAX * 4)

9077 SmallerVT =getLMUL1VT(VecVT)

9078 .getDoubleNumVectorElementsVT()

9079 .getDoubleNumVectorElementsVT();

9080if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))

9081return std::nullopt;

9082return SmallerVT;

9083}

9084

9085// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the

9086// first position of a vector, and that vector is slid up to the insert index.

9087// By limiting the active vector length to index+1 and merging with the

9088// original vector (with an undisturbed tail policy for elements >= VL), we

9089// achieve the desired result of leaving all elements untouched except the one

9090// at VL-1, which is replaced with the desired value.

9091SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,

9092SelectionDAG &DAG) const{

9093SDLoc DL(Op);

9094MVT VecVT =Op.getSimpleValueType();

9095MVT XLenVT = Subtarget.getXLenVT();

9096SDValue Vec =Op.getOperand(0);

9097SDValue Val =Op.getOperand(1);

9098MVT ValVT = Val.getSimpleValueType();

9099SDValue Idx =Op.getOperand(2);

9100

9101if (VecVT.getVectorElementType() == MVT::i1) {

9102// FIXME: For now we just promote to an i8 vector and insert into that,

9103// but this is probably not optimal.

9104MVT WideVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());

9105 Vec = DAG.getNode(ISD::ZERO_EXTEND,DL, WideVT, Vec);

9106 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, WideVT, Vec, Val,Idx);

9107return DAG.getNode(ISD::TRUNCATE,DL, VecVT, Vec);

9108 }

9109

9110if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||

9111 ValVT == MVT::bf16) {

9112// If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.

9113MVT IntVT = VecVT.changeTypeToInteger();

9114SDValue IntInsert = DAG.getNode(

9115ISD::INSERT_VECTOR_ELT,DL, IntVT, DAG.getBitcast(IntVT, Vec),

9116 DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Val),Idx);

9117return DAG.getBitcast(VecVT, IntInsert);

9118 }

9119

9120MVT ContainerVT = VecVT;

9121// If the operand is a fixed-length vector, convert to a scalable one.

9122if (VecVT.isFixedLengthVector()) {

9123 ContainerVT =getContainerForFixedLengthVector(VecVT);

9124 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

9125 }

9126

9127// If we know the index we're going to insert at, we can shrink Vec so that

9128// we're performing the scalar inserts and slideup on a smaller LMUL.

9129MVT OrigContainerVT = ContainerVT;

9130SDValue OrigVec = Vec;

9131SDValue AlignedIdx;

9132if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {

9133constunsigned OrigIdx = IdxC->getZExtValue();

9134// Do we know an upper bound on LMUL?

9135if (auto ShrunkVT =getSmallestVTForIndex(ContainerVT, OrigIdx,

9136DL, DAG, Subtarget)) {

9137 ContainerVT = *ShrunkVT;

9138 AlignedIdx = DAG.getVectorIdxConstant(0,DL);

9139 }

9140

9141// If we're compiling for an exact VLEN value, we can always perform

9142// the insert in m1 as we can determine the register corresponding to

9143// the index in the register group.

9144constMVT M1VT =getLMUL1VT(ContainerVT);

9145if (auto VLEN = Subtarget.getRealVLen();

9146 VLEN && ContainerVT.bitsGT(M1VT)) {

9147EVT ElemVT = VecVT.getVectorElementType();

9148unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();

9149unsigned RemIdx = OrigIdx % ElemsPerVReg;

9150unsigned SubRegIdx = OrigIdx / ElemsPerVReg;

9151unsigned ExtractIdx =

9152 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();

9153 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx,DL);

9154Idx = DAG.getVectorIdxConstant(RemIdx,DL);

9155 ContainerVT = M1VT;

9156 }

9157

9158if (AlignedIdx)

9159 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerVT, Vec,

9160 AlignedIdx);

9161 }

9162

9163bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;

9164// Even i64-element vectors on RV32 can be lowered without scalar

9165// legalization if the most-significant 32 bits of the value are not affected

9166// by the sign-extension of the lower 32 bits.

9167// TODO: We could also catch sign extensions of a 32-bit value.

9168if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {

9169constauto *CVal = cast<ConstantSDNode>(Val);

9170if (isInt<32>(CVal->getSExtValue())) {

9171 IsLegalInsert =true;

9172 Val = DAG.getSignedConstant(CVal->getSExtValue(),DL, MVT::i32);

9173 }

9174 }

9175

9176auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);

9177

9178SDValue ValInVec;

9179

9180if (IsLegalInsert) {

9181unsigned Opc =

9182 VecVT.isFloatingPoint() ?RISCVISD::VFMV_S_F_VL :RISCVISD::VMV_S_X_VL;

9183if (isNullConstant(Idx)) {

9184if (!VecVT.isFloatingPoint())

9185 Val = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT, Val);

9186 Vec = DAG.getNode(Opc,DL, ContainerVT, Vec, Val, VL);

9187

9188if (AlignedIdx)

9189 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, OrigContainerVT, OrigVec,

9190 Vec, AlignedIdx);

9191if (!VecVT.isFixedLengthVector())

9192return Vec;

9193returnconvertFromScalableVector(VecVT, Vec, DAG, Subtarget);

9194 }

9195 ValInVec =lowerScalarInsert(Val, VL, ContainerVT,DL, DAG, Subtarget);

9196 }else {

9197// On RV32, i64-element vectors must be specially handled to place the

9198// value at element 0, by using two vslide1down instructions in sequence on

9199// the i32 split lo/hi value. Use an equivalently-sized i32 vector for

9200// this.

9201SDValue ValLo, ValHi;

9202 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val,DL, MVT::i32, MVT::i32);

9203MVT I32ContainerVT =

9204MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);

9205SDValue I32Mask =

9206getDefaultScalableVLOps(I32ContainerVT,DL, DAG, Subtarget).first;

9207// Limit the active VL to two.

9208SDValue InsertI64VL = DAG.getConstant(2,DL, XLenVT);

9209// If the Idx is 0 we can insert directly into the vector.

9210if (isNullConstant(Idx)) {

9211// First slide in the lo value, then the hi in above it. We use slide1down

9212// to avoid the register group overlap constraint of vslide1up.

9213 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32ContainerVT,

9214 Vec, Vec, ValLo, I32Mask, InsertI64VL);

9215// If the source vector is undef don't pass along the tail elements from

9216// the previous slide1down.

9217SDValue Tail = Vec.isUndef() ? Vec : ValInVec;

9218 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32ContainerVT,

9219Tail, ValInVec, ValHi, I32Mask, InsertI64VL);

9220// Bitcast back to the right container type.

9221 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);

9222

9223if (AlignedIdx)

9224 ValInVec =

9225 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, OrigContainerVT, OrigVec,

9226 ValInVec, AlignedIdx);

9227if (!VecVT.isFixedLengthVector())

9228return ValInVec;

9229returnconvertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);

9230 }

9231

9232// First slide in the lo value, then the hi in above it. We use slide1down

9233// to avoid the register group overlap constraint of vslide1up.

9234 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32ContainerVT,

9235 DAG.getUNDEF(I32ContainerVT),

9236 DAG.getUNDEF(I32ContainerVT), ValLo,

9237 I32Mask, InsertI64VL);

9238 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32ContainerVT,

9239 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,

9240 I32Mask, InsertI64VL);

9241// Bitcast back to the right container type.

9242 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);

9243 }

9244

9245// Now that the value is in a vector, slide it into position.

9246SDValue InsertVL =

9247 DAG.getNode(ISD::ADD,DL, XLenVT,Idx, DAG.getConstant(1,DL, XLenVT));

9248

9249// Use tail agnostic policy if Idx is the last index of Vec.

9250unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;

9251if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&

9252Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())

9253 Policy =RISCVII::TAIL_AGNOSTIC;

9254SDValue Slideup =getVSlideup(DAG, Subtarget,DL, ContainerVT, Vec, ValInVec,

9255Idx, Mask, InsertVL, Policy);

9256

9257if (AlignedIdx)

9258 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, OrigContainerVT, OrigVec,

9259 Slideup, AlignedIdx);

9260if (!VecVT.isFixedLengthVector())

9261return Slideup;

9262returnconvertFromScalableVector(VecVT, Slideup, DAG, Subtarget);

9263}

9264

9265// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then

9266// extract the first element: (extractelt (slidedown vec, idx), 0). For integer

9267// types this is done using VMV_X_S to allow us to glean information about the

9268// sign bits of the result.

9269SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,

9270SelectionDAG &DAG) const{

9271SDLoc DL(Op);

9272SDValue Idx =Op.getOperand(1);

9273SDValue Vec =Op.getOperand(0);

9274EVT EltVT =Op.getValueType();

9275MVT VecVT = Vec.getSimpleValueType();

9276MVT XLenVT = Subtarget.getXLenVT();

9277

9278if (VecVT.getVectorElementType() == MVT::i1) {

9279// Use vfirst.m to extract the first bit.

9280if (isNullConstant(Idx)) {

9281MVT ContainerVT = VecVT;

9282if (VecVT.isFixedLengthVector()) {

9283 ContainerVT =getContainerForFixedLengthVector(VecVT);

9284 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

9285 }

9286auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);

9287SDValue Vfirst =

9288 DAG.getNode(RISCVISD::VFIRST_VL,DL, XLenVT, Vec, Mask, VL);

9289SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,

9290 DAG.getConstant(0,DL, XLenVT),ISD::SETEQ);

9291return DAG.getNode(ISD::TRUNCATE,DL, EltVT, Res);

9292 }

9293if (VecVT.isFixedLengthVector()) {

9294unsigned NumElts = VecVT.getVectorNumElements();

9295if (NumElts >= 8) {

9296MVT WideEltVT;

9297unsigned WidenVecLen;

9298SDValue ExtractElementIdx;

9299SDValue ExtractBitIdx;

9300unsigned MaxEEW = Subtarget.getELen();

9301MVT LargestEltVT =MVT::getIntegerVT(

9302 std::min(MaxEEW,unsigned(XLenVT.getSizeInBits())));

9303if (NumElts <= LargestEltVT.getSizeInBits()) {

9304assert(isPowerOf2_32(NumElts) &&

9305"the number of elements should be power of 2");

9306 WideEltVT =MVT::getIntegerVT(NumElts);

9307 WidenVecLen = 1;

9308 ExtractElementIdx = DAG.getConstant(0,DL, XLenVT);

9309 ExtractBitIdx =Idx;

9310 }else {

9311 WideEltVT = LargestEltVT;

9312 WidenVecLen = NumElts / WideEltVT.getSizeInBits();

9313// extract element index = index / element width

9314 ExtractElementIdx = DAG.getNode(

9315ISD::SRL,DL, XLenVT,Idx,

9316 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()),DL, XLenVT));

9317// mask bit index = index % element width

9318 ExtractBitIdx = DAG.getNode(

9319ISD::AND,DL, XLenVT,Idx,

9320 DAG.getConstant(WideEltVT.getSizeInBits() - 1,DL, XLenVT));

9321 }

9322MVT WideVT =MVT::getVectorVT(WideEltVT, WidenVecLen);

9323 Vec = DAG.getNode(ISD::BITCAST,DL, WideVT, Vec);

9324SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, XLenVT,

9325 Vec, ExtractElementIdx);

9326// Extract the bit from GPR.

9327SDValue ShiftRight =

9328 DAG.getNode(ISD::SRL,DL, XLenVT, ExtractElt, ExtractBitIdx);

9329SDValue Res = DAG.getNode(ISD::AND,DL, XLenVT, ShiftRight,

9330 DAG.getConstant(1,DL, XLenVT));

9331return DAG.getNode(ISD::TRUNCATE,DL, EltVT, Res);

9332 }

9333 }

9334// Otherwise, promote to an i8 vector and extract from that.

9335MVT WideVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());

9336 Vec = DAG.getNode(ISD::ZERO_EXTEND,DL, WideVT, Vec);

9337return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, EltVT, Vec,Idx);

9338 }

9339

9340if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||

9341 EltVT == MVT::bf16) {

9342// If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x

9343MVT IntVT = VecVT.changeTypeToInteger();

9344SDValue IntVec = DAG.getBitcast(IntVT, Vec);

9345SDValue IntExtract =

9346 DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, XLenVT, IntVec,Idx);

9347return DAG.getNode(RISCVISD::FMV_H_X,DL, EltVT, IntExtract);

9348 }

9349

9350// If this is a fixed vector, we need to convert it to a scalable vector.

9351MVT ContainerVT = VecVT;

9352if (VecVT.isFixedLengthVector()) {

9353 ContainerVT =getContainerForFixedLengthVector(VecVT);

9354 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

9355 }

9356

9357// If we're compiling for an exact VLEN value and we have a known

9358// constant index, we can always perform the extract in m1 (or

9359// smaller) as we can determine the register corresponding to

9360// the index in the register group.

9361constauto VLen = Subtarget.getRealVLen();

9362if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);

9363 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {

9364MVT M1VT =getLMUL1VT(ContainerVT);

9365unsigned OrigIdx = IdxC->getZExtValue();

9366EVT ElemVT = VecVT.getVectorElementType();

9367unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();

9368unsigned RemIdx = OrigIdx % ElemsPerVReg;

9369unsigned SubRegIdx = OrigIdx / ElemsPerVReg;

9370unsigned ExtractIdx =

9371 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();

9372 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, M1VT, Vec,

9373 DAG.getVectorIdxConstant(ExtractIdx,DL));

9374Idx = DAG.getVectorIdxConstant(RemIdx,DL);

9375 ContainerVT = M1VT;

9376 }

9377

9378// Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which

9379// contains our index.

9380 std::optional<uint64_t> MaxIdx;

9381if (VecVT.isFixedLengthVector())

9382 MaxIdx = VecVT.getVectorNumElements() - 1;

9383if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))

9384 MaxIdx = IdxC->getZExtValue();

9385if (MaxIdx) {

9386if (auto SmallerVT =

9387getSmallestVTForIndex(ContainerVT, *MaxIdx,DL, DAG, Subtarget)) {

9388 ContainerVT = *SmallerVT;

9389 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerVT, Vec,

9390 DAG.getConstant(0,DL, XLenVT));

9391 }

9392 }

9393

9394// If after narrowing, the required slide is still greater than LMUL2,

9395// fallback to generic expansion and go through the stack. This is done

9396// for a subtle reason: extracting *all* elements out of a vector is

9397// widely expected to be linear in vector size, but because vslidedown

9398// is linear in LMUL, performing N extracts using vslidedown becomes

9399// O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack

9400// seems to have the same problem (the store is linear in LMUL), but the

9401// generic expansion *memoizes* the store, and thus for many extracts of

9402// the same vector we end up with one store and a bunch of loads.

9403// TODO: We don't have the same code for insert_vector_elt because we

9404// have BUILD_VECTOR and handle the degenerate case there. Should we

9405// consider adding an inverse BUILD_VECTOR node?

9406MVT LMUL2VT =getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();

9407if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())

9408returnSDValue();

9409

9410// If the index is 0, the vector is already in the right position.

9411if (!isNullConstant(Idx)) {

9412// Use a VL of 1 to avoid processing more elements than we need.

9413auto [Mask, VL] =getDefaultVLOps(1, ContainerVT,DL, DAG, Subtarget);

9414 Vec =getVSlidedown(DAG, Subtarget,DL, ContainerVT,

9415 DAG.getUNDEF(ContainerVT), Vec,Idx, Mask, VL);

9416 }

9417

9418if (!EltVT.isInteger()) {

9419// Floating-point extracts are handled in TableGen.

9420return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, EltVT, Vec,

9421 DAG.getVectorIdxConstant(0,DL));

9422 }

9423

9424SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, Vec);

9425return DAG.getNode(ISD::TRUNCATE,DL, EltVT, Elt0);

9426}

9427

9428// Some RVV intrinsics may claim that they want an integer operand to be

9429// promoted or expanded.

9430staticSDValue lowerVectorIntrinsicScalars(SDValue Op,SelectionDAG &DAG,

9431constRISCVSubtarget &Subtarget) {

9432assert((Op.getOpcode() ==ISD::INTRINSIC_VOID ||

9433Op.getOpcode() ==ISD::INTRINSIC_WO_CHAIN ||

9434Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN) &&

9435"Unexpected opcode");

9436

9437if (!Subtarget.hasVInstructions())

9438returnSDValue();

9439

9440bool HasChain =Op.getOpcode() ==ISD::INTRINSIC_VOID ||

9441Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN;

9442unsigned IntNo =Op.getConstantOperandVal(HasChain ? 1 : 0);

9443

9444SDLoc DL(Op);

9445

9446constRISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =

9447 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);

9448if (!II || !II->hasScalarOperand())

9449returnSDValue();

9450

9451unsigned SplatOp =II->ScalarOperand + 1 + HasChain;

9452assert(SplatOp <Op.getNumOperands());

9453

9454SmallVector<SDValue, 8>Operands(Op->ops());

9455SDValue &ScalarOp =Operands[SplatOp];

9456MVT OpVT = ScalarOp.getSimpleValueType();

9457MVT XLenVT = Subtarget.getXLenVT();

9458

9459// If this isn't a scalar, or its type is XLenVT we're done.

9460if (!OpVT.isScalarInteger() || OpVT == XLenVT)

9461returnSDValue();

9462

9463// Simplest case is that the operand needs to be promoted to XLenVT.

9464if (OpVT.bitsLT(XLenVT)) {

9465// If the operand is a constant, sign extend to increase our chances

9466// of being able to use a .vi instruction. ANY_EXTEND would become a

9467// a zero extend and the simm5 check in isel would fail.

9468// FIXME: Should we ignore the upper bits in isel instead?

9469unsigned ExtOpc =

9470 isa<ConstantSDNode>(ScalarOp) ?ISD::SIGN_EXTEND :ISD::ANY_EXTEND;

9471 ScalarOp = DAG.getNode(ExtOpc,DL, XLenVT, ScalarOp);

9472return DAG.getNode(Op->getOpcode(),DL,Op->getVTList(),Operands);

9473 }

9474

9475// Use the previous operand to get the vXi64 VT. The result might be a mask

9476// VT for compares. Using the previous operand assumes that the previous

9477// operand will never have a smaller element size than a scalar operand and

9478// that a widening operation never uses SEW=64.

9479// NOTE: If this fails the below assert, we can probably just find the

9480// element count from any operand or result and use it to construct the VT.

9481assert(II->ScalarOperand > 0 &&"Unexpected splat operand!");

9482MVT VT =Op.getOperand(SplatOp - 1).getSimpleValueType();

9483

9484// The more complex case is when the scalar is larger than XLenVT.

9485assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&

9486 VT.getVectorElementType() == MVT::i64 &&"Unexpected VTs!");

9487

9488// If this is a sign-extended 32-bit value, we can truncate it and rely on the

9489// instruction to sign-extend since SEW>XLEN.

9490if (DAG.ComputeNumSignBits(ScalarOp) > 32) {

9491 ScalarOp = DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, ScalarOp);

9492return DAG.getNode(Op->getOpcode(),DL,Op->getVTList(),Operands);

9493 }

9494

9495switch (IntNo) {

9496case Intrinsic::riscv_vslide1up:

9497case Intrinsic::riscv_vslide1down:

9498case Intrinsic::riscv_vslide1up_mask:

9499case Intrinsic::riscv_vslide1down_mask: {

9500// We need to special case these when the scalar is larger than XLen.

9501unsigned NumOps =Op.getNumOperands();

9502bool IsMasked = NumOps == 7;

9503

9504// Convert the vector source to the equivalent nxvXi32 vector.

9505MVT I32VT =MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);

9506SDValue Vec = DAG.getBitcast(I32VT,Operands[2]);

9507SDValue ScalarLo, ScalarHi;

9508 std::tie(ScalarLo, ScalarHi) =

9509 DAG.SplitScalar(ScalarOp,DL, MVT::i32, MVT::i32);

9510

9511// Double the VL since we halved SEW.

9512SDValue AVL =getVLOperand(Op);

9513SDValue I32VL;

9514

9515// Optimize for constant AVL

9516if (isa<ConstantSDNode>(AVL)) {

9517constauto [MinVLMAX, MaxVLMAX] =

9518RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget);

9519

9520uint64_t AVLInt = AVL->getAsZExtVal();

9521if (AVLInt <= MinVLMAX) {

9522 I32VL = DAG.getConstant(2 * AVLInt,DL, XLenVT);

9523 }elseif (AVLInt >= 2 * MaxVLMAX) {

9524// Just set vl to VLMAX in this situation

9525 I32VL = DAG.getRegister(RISCV::X0, XLenVT);

9526 }else {

9527// For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl

9528// is related to the hardware implementation.

9529// So let the following code handle

9530 }

9531 }

9532if (!I32VL) {

9533RISCVII::VLMUL Lmul =RISCVTargetLowering::getLMUL(VT);

9534SDValue LMUL = DAG.getConstant(Lmul,DL, XLenVT);

9535unsigned Sew =RISCVVType::encodeSEW(VT.getScalarSizeInBits());

9536SDValue SEW = DAG.getConstant(Sew,DL, XLenVT);

9537SDValue SETVL =

9538 DAG.getTargetConstant(Intrinsic::riscv_vsetvli,DL, MVT::i32);

9539// Using vsetvli instruction to get actually used length which related to

9540// the hardware implementation

9541SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, XLenVT, SETVL, AVL,

9542 SEW, LMUL);

9543 I32VL =

9544 DAG.getNode(ISD::SHL,DL, XLenVT, VL, DAG.getConstant(1,DL, XLenVT));

9545 }

9546

9547SDValue I32Mask =getAllOnesMask(I32VT, I32VL,DL, DAG);

9548

9549// Shift the two scalar parts in using SEW=32 slide1up/slide1down

9550// instructions.

9551SDValue Passthru;

9552if (IsMasked)

9553 Passthru = DAG.getUNDEF(I32VT);

9554else

9555 Passthru = DAG.getBitcast(I32VT,Operands[1]);

9556

9557if (IntNo == Intrinsic::riscv_vslide1up ||

9558 IntNo == Intrinsic::riscv_vslide1up_mask) {

9559 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL,DL, I32VT, Passthru, Vec,

9560 ScalarHi, I32Mask, I32VL);

9561 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL,DL, I32VT, Passthru, Vec,

9562 ScalarLo, I32Mask, I32VL);

9563 }else {

9564 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32VT, Passthru, Vec,

9565 ScalarLo, I32Mask, I32VL);

9566 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL,DL, I32VT, Passthru, Vec,

9567 ScalarHi, I32Mask, I32VL);

9568 }

9569

9570// Convert back to nxvXi64.

9571 Vec = DAG.getBitcast(VT, Vec);

9572

9573if (!IsMasked)

9574return Vec;

9575// Apply mask after the operation.

9576SDValue Mask =Operands[NumOps - 3];

9577SDValue MaskedOff =Operands[1];

9578// Assume Policy operand is the last operand.

9579uint64_t Policy =Operands[NumOps - 1]->getAsZExtVal();

9580// We don't need to select maskedoff if it's undef.

9581if (MaskedOff.isUndef())

9582return Vec;

9583// TAMU

9584if (Policy ==RISCVII::TAIL_AGNOSTIC)

9585return DAG.getNode(RISCVISD::VMERGE_VL,DL, VT, Mask, Vec, MaskedOff,

9586 DAG.getUNDEF(VT), AVL);

9587// TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.

9588// It's fine because vmerge does not care mask policy.

9589return DAG.getNode(RISCVISD::VMERGE_VL,DL, VT, Mask, Vec, MaskedOff,

9590 MaskedOff, AVL);

9591 }

9592 }

9593

9594// We need to convert the scalar to a splat vector.

9595SDValue VL =getVLOperand(Op);

9596assert(VL.getValueType() == XLenVT);

9597 ScalarOp =splatSplitI64WithVL(DL, VT,SDValue(), ScalarOp, VL, DAG);

9598return DAG.getNode(Op->getOpcode(),DL,Op->getVTList(),Operands);

9599}

9600

9601// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support

9602// scalable vector llvm.get.vector.length for now.

9603//

9604// We need to convert from a scalable VF to a vsetvli with VLMax equal to

9605// (vscale * VF). The vscale and VF are independent of element width. We use

9606// SEW=8 for the vsetvli because it is the only element width that supports all

9607// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is

9608// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The

9609// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different

9610// SEW and LMUL are better for the surrounding vector instructions.

9611staticSDValue lowerGetVectorLength(SDNode *N,SelectionDAG &DAG,

9612constRISCVSubtarget &Subtarget) {

9613MVT XLenVT = Subtarget.getXLenVT();

9614

9615// The smallest LMUL is only valid for the smallest element width.

9616constunsigned ElementWidth = 8;

9617

9618// Determine the VF that corresponds to LMUL 1 for ElementWidth.

9619unsigned LMul1VF =RISCV::RVVBitsPerBlock / ElementWidth;

9620// We don't support VF==1 with ELEN==32.

9621 [[maybe_unused]]unsigned MinVF =

9622RISCV::RVVBitsPerBlock / Subtarget.getELen();

9623

9624 [[maybe_unused]]unsigned VF =N->getConstantOperandVal(2);

9625assert(VF >= MinVF && VF <= (LMul1VF * 8) &&isPowerOf2_32(VF) &&

9626"Unexpected VF");

9627

9628bool Fractional = VF < LMul1VF;

9629unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;

9630unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);

9631unsigned VSEW =RISCVVType::encodeSEW(ElementWidth);

9632

9633SDLoc DL(N);

9634

9635SDValue LMul = DAG.getTargetConstant(VLMUL,DL, XLenVT);

9636SDValue Sew = DAG.getTargetConstant(VSEW,DL, XLenVT);

9637

9638SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND,DL, XLenVT,N->getOperand(1));

9639

9640SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli,DL, XLenVT);

9641SDValue Res =

9642 DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, XLenVT,ID, AVL, Sew, LMul);

9643return DAG.getNode(ISD::TRUNCATE,DL,N->getValueType(0), Res);

9644}

9645

9646staticSDValue lowerCttzElts(SDNode *N,SelectionDAG &DAG,

9647constRISCVSubtarget &Subtarget) {

9648SDValue Op0 =N->getOperand(1);

9649MVT OpVT = Op0.getSimpleValueType();

9650MVT ContainerVT = OpVT;

9651if (OpVT.isFixedLengthVector()) {

9652 ContainerVT =getContainerForFixedLengthVector(DAG, OpVT, Subtarget);

9653 Op0 =convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);

9654 }

9655MVT XLenVT = Subtarget.getXLenVT();

9656SDLoc DL(N);

9657auto [Mask, VL] =getDefaultVLOps(OpVT, ContainerVT,DL, DAG, Subtarget);

9658SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL,DL, XLenVT, Op0, Mask, VL);

9659if (isOneConstant(N->getOperand(2)))

9660return Res;

9661

9662// Convert -1 to VL.

9663SDValue Setcc =

9664 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0,DL, XLenVT),ISD::SETLT);

9665 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());

9666return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);

9667}

9668

9669staticinlinevoidpromoteVCIXScalar(constSDValue &Op,

9670SmallVectorImpl<SDValue> &Operands,

9671SelectionDAG &DAG) {

9672constRISCVSubtarget &Subtarget =

9673 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();

9674

9675bool HasChain =Op.getOpcode() ==ISD::INTRINSIC_VOID ||

9676Op.getOpcode() ==ISD::INTRINSIC_W_CHAIN;

9677unsigned IntNo =Op.getConstantOperandVal(HasChain ? 1 : 0);

9678SDLoc DL(Op);

9679

9680constRISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =

9681 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);

9682if (!II || !II->hasScalarOperand())

9683return;

9684

9685unsigned SplatOp =II->ScalarOperand + 1;

9686assert(SplatOp <Op.getNumOperands());

9687

9688SDValue &ScalarOp =Operands[SplatOp];

9689MVT OpVT = ScalarOp.getSimpleValueType();

9690MVT XLenVT = Subtarget.getXLenVT();

9691

9692// The code below is partially copied from lowerVectorIntrinsicScalars.

9693// If this isn't a scalar, or its type is XLenVT we're done.

9694if (!OpVT.isScalarInteger() || OpVT == XLenVT)

9695return;

9696

9697// Manually emit promote operation for scalar operation.

9698if (OpVT.bitsLT(XLenVT)) {

9699unsigned ExtOpc =

9700 isa<ConstantSDNode>(ScalarOp) ?ISD::SIGN_EXTEND :ISD::ANY_EXTEND;

9701 ScalarOp = DAG.getNode(ExtOpc,DL, XLenVT, ScalarOp);

9702 }

9703}

9704

9705staticvoidprocessVCIXOperands(SDValue &OrigOp,

9706SmallVectorImpl<SDValue> &Operands,

9707SelectionDAG &DAG) {

9708promoteVCIXScalar(OrigOp,Operands, DAG);

9709constRISCVSubtarget &Subtarget =

9710 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();

9711for (SDValue &V :Operands) {

9712EVT ValType = V.getValueType();

9713if (ValType.isVector() && ValType.isFloatingPoint()) {

9714MVT InterimIVT =

9715MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),

9716 ValType.getVectorElementCount());

9717 V = DAG.getBitcast(InterimIVT, V);

9718 }

9719if (ValType.isFixedLengthVector()) {

9720MVT OpContainerVT =getContainerForFixedLengthVector(

9721 DAG, V.getSimpleValueType(), Subtarget);

9722 V =convertToScalableVector(OpContainerVT, V, DAG, Subtarget);

9723 }

9724 }

9725}

9726

9727// LMUL * VLEN should be greater than or equal to EGS * SEW

9728staticinlineboolisValidEGW(int EGS,EVT VT,

9729constRISCVSubtarget &Subtarget) {

9730return (Subtarget.getRealMinVLen() *

9731 VT.getSizeInBits().getKnownMinValue()) /RISCV::RVVBitsPerBlock >=

9732 EGS * VT.getScalarSizeInBits();

9733}

9734

9735SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,

9736SelectionDAG &DAG) const{

9737unsigned IntNo =Op.getConstantOperandVal(0);

9738SDLoc DL(Op);

9739MVT XLenVT = Subtarget.getXLenVT();

9740

9741switch (IntNo) {

9742default:

9743break;// Don't custom lower most intrinsics.

9744case Intrinsic::riscv_tuple_insert: {

9745SDValue Vec =Op.getOperand(1);

9746SDValue SubVec =Op.getOperand(2);

9747SDValue Index =Op.getOperand(3);

9748

9749return DAG.getNode(RISCVISD::TUPLE_INSERT,DL,Op.getValueType(), Vec,

9750 SubVec, Index);

9751 }

9752case Intrinsic::riscv_tuple_extract: {

9753SDValue Vec =Op.getOperand(1);

9754SDValue Index =Op.getOperand(2);

9755

9756return DAG.getNode(RISCVISD::TUPLE_EXTRACT,DL,Op.getValueType(), Vec,

9757 Index);

9758 }

9759case Intrinsic::thread_pointer: {

9760EVT PtrVT =getPointerTy(DAG.getDataLayout());

9761return DAG.getRegister(RISCV::X4, PtrVT);

9762 }

9763case Intrinsic::riscv_orc_b:

9764case Intrinsic::riscv_brev8:

9765case Intrinsic::riscv_sha256sig0:

9766case Intrinsic::riscv_sha256sig1:

9767case Intrinsic::riscv_sha256sum0:

9768case Intrinsic::riscv_sha256sum1:

9769case Intrinsic::riscv_sm3p0:

9770case Intrinsic::riscv_sm3p1: {

9771unsigned Opc;

9772switch (IntNo) {

9773case Intrinsic::riscv_orc_b: Opc =RISCVISD::ORC_B;break;

9774case Intrinsic::riscv_brev8: Opc =RISCVISD::BREV8;break;

9775case Intrinsic::riscv_sha256sig0: Opc =RISCVISD::SHA256SIG0;break;

9776case Intrinsic::riscv_sha256sig1: Opc =RISCVISD::SHA256SIG1;break;

9777case Intrinsic::riscv_sha256sum0: Opc =RISCVISD::SHA256SUM0;break;

9778case Intrinsic::riscv_sha256sum1: Opc =RISCVISD::SHA256SUM1;break;

9779case Intrinsic::riscv_sm3p0: Opc =RISCVISD::SM3P0;break;

9780case Intrinsic::riscv_sm3p1: Opc =RISCVISD::SM3P1;break;

9781 }

9782

9783return DAG.getNode(Opc,DL, XLenVT,Op.getOperand(1));

9784 }

9785case Intrinsic::riscv_sm4ks:

9786case Intrinsic::riscv_sm4ed: {

9787unsigned Opc =

9788 IntNo == Intrinsic::riscv_sm4ks ?RISCVISD::SM4KS :RISCVISD::SM4ED;

9789

9790return DAG.getNode(Opc,DL, XLenVT,Op.getOperand(1),Op.getOperand(2),

9791Op.getOperand(3));

9792 }

9793case Intrinsic::riscv_zip:

9794case Intrinsic::riscv_unzip: {

9795unsigned Opc =

9796 IntNo == Intrinsic::riscv_zip ?RISCVISD::ZIP :RISCVISD::UNZIP;

9797return DAG.getNode(Opc,DL, XLenVT,Op.getOperand(1));

9798 }

9799case Intrinsic::riscv_mopr:

9800return DAG.getNode(RISCVISD::MOPR,DL, XLenVT,Op.getOperand(1),

9801Op.getOperand(2));

9802

9803case Intrinsic::riscv_moprr: {

9804return DAG.getNode(RISCVISD::MOPRR,DL, XLenVT,Op.getOperand(1),

9805Op.getOperand(2),Op.getOperand(3));

9806 }

9807case Intrinsic::riscv_clmul:

9808return DAG.getNode(RISCVISD::CLMUL,DL, XLenVT,Op.getOperand(1),

9809Op.getOperand(2));

9810case Intrinsic::riscv_clmulh:

9811case Intrinsic::riscv_clmulr: {

9812unsigned Opc =

9813 IntNo == Intrinsic::riscv_clmulh ?RISCVISD::CLMULH :RISCVISD::CLMULR;

9814return DAG.getNode(Opc,DL, XLenVT,Op.getOperand(1),Op.getOperand(2));

9815 }

9816case Intrinsic::experimental_get_vector_length:

9817returnlowerGetVectorLength(Op.getNode(), DAG, Subtarget);

9818case Intrinsic::experimental_cttz_elts:

9819returnlowerCttzElts(Op.getNode(), DAG, Subtarget);

9820case Intrinsic::riscv_vmv_x_s: {

9821SDValue Res = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT,Op.getOperand(1));

9822return DAG.getNode(ISD::TRUNCATE,DL,Op.getValueType(), Res);

9823 }

9824case Intrinsic::riscv_vfmv_f_s:

9825return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL,Op.getValueType(),

9826Op.getOperand(1), DAG.getVectorIdxConstant(0,DL));

9827case Intrinsic::riscv_vmv_v_x:

9828returnlowerScalarSplat(Op.getOperand(1),Op.getOperand(2),

9829Op.getOperand(3),Op.getSimpleValueType(),DL, DAG,

9830 Subtarget);

9831case Intrinsic::riscv_vfmv_v_f:

9832return DAG.getNode(RISCVISD::VFMV_V_F_VL,DL,Op.getValueType(),

9833Op.getOperand(1),Op.getOperand(2),Op.getOperand(3));

9834case Intrinsic::riscv_vmv_s_x: {

9835SDValue Scalar =Op.getOperand(2);

9836

9837if (Scalar.getValueType().bitsLE(XLenVT)) {

9838Scalar = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT, Scalar);

9839return DAG.getNode(RISCVISD::VMV_S_X_VL,DL,Op.getValueType(),

9840Op.getOperand(1), Scalar,Op.getOperand(3));

9841 }

9842

9843assert(Scalar.getValueType() == MVT::i64 &&"Unexpected scalar VT!");

9844

9845// This is an i64 value that lives in two scalar registers. We have to

9846// insert this in a convoluted way. First we build vXi64 splat containing

9847// the two values that we assemble using some bit math. Next we'll use

9848// vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask

9849// to merge element 0 from our splat into the source vector.

9850// FIXME: This is probably not the best way to do this, but it is

9851// consistent with INSERT_VECTOR_ELT lowering so it is a good starting

9852// point.

9853// sw lo, (a0)

9854// sw hi, 4(a0)

9855// vlse vX, (a0)

9856//

9857// vid.v vVid

9858// vmseq.vx mMask, vVid, 0

9859// vmerge.vvm vDest, vSrc, vVal, mMask

9860MVT VT =Op.getSimpleValueType();

9861SDValue Vec =Op.getOperand(1);

9862SDValue VL =getVLOperand(Op);

9863

9864SDValue SplattedVal =splatSplitI64WithVL(DL, VT,SDValue(), Scalar, VL, DAG);

9865if (Op.getOperand(1).isUndef())

9866return SplattedVal;

9867SDValue SplattedIdx =

9868 DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, DAG.getUNDEF(VT),

9869 DAG.getConstant(0,DL, MVT::i32), VL);

9870

9871MVT MaskVT =getMaskTypeFor(VT);

9872SDValue Mask =getAllOnesMask(VT, VL,DL, DAG);

9873SDValue VID = DAG.getNode(RISCVISD::VID_VL,DL, VT, Mask, VL);

9874SDValue SelectCond =

9875 DAG.getNode(RISCVISD::SETCC_VL,DL, MaskVT,

9876 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),

9877 DAG.getUNDEF(MaskVT), Mask, VL});

9878return DAG.getNode(RISCVISD::VMERGE_VL,DL, VT, SelectCond, SplattedVal,

9879 Vec, DAG.getUNDEF(VT), VL);

9880 }

9881case Intrinsic::riscv_vfmv_s_f:

9882return DAG.getNode(RISCVISD::VFMV_S_F_VL,DL,Op.getSimpleValueType(),

9883Op.getOperand(1),Op.getOperand(2),Op.getOperand(3));

9884// EGS * EEW >= 128 bits

9885case Intrinsic::riscv_vaesdf_vv:

9886case Intrinsic::riscv_vaesdf_vs:

9887case Intrinsic::riscv_vaesdm_vv:

9888case Intrinsic::riscv_vaesdm_vs:

9889case Intrinsic::riscv_vaesef_vv:

9890case Intrinsic::riscv_vaesef_vs:

9891case Intrinsic::riscv_vaesem_vv:

9892case Intrinsic::riscv_vaesem_vs:

9893case Intrinsic::riscv_vaeskf1:

9894case Intrinsic::riscv_vaeskf2:

9895case Intrinsic::riscv_vaesz_vs:

9896case Intrinsic::riscv_vsm4k:

9897case Intrinsic::riscv_vsm4r_vv:

9898case Intrinsic::riscv_vsm4r_vs: {

9899if (!isValidEGW(4,Op.getSimpleValueType(), Subtarget) ||

9900 !isValidEGW(4,Op->getOperand(1).getSimpleValueType(), Subtarget) ||

9901 !isValidEGW(4,Op->getOperand(2).getSimpleValueType(), Subtarget))

9902report_fatal_error("EGW should be greater than or equal to 4 * SEW.");

9903returnOp;

9904 }

9905// EGS * EEW >= 256 bits

9906case Intrinsic::riscv_vsm3c:

9907case Intrinsic::riscv_vsm3me: {

9908if (!isValidEGW(8,Op.getSimpleValueType(), Subtarget) ||

9909 !isValidEGW(8,Op->getOperand(1).getSimpleValueType(), Subtarget))

9910report_fatal_error("EGW should be greater than or equal to 8 * SEW.");

9911returnOp;

9912 }

9913// zvknha(SEW=32)/zvknhb(SEW=[32|64])

9914case Intrinsic::riscv_vsha2ch:

9915case Intrinsic::riscv_vsha2cl:

9916case Intrinsic::riscv_vsha2ms: {

9917if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&

9918 !Subtarget.hasStdExtZvknhb())

9919report_fatal_error("SEW=64 needs Zvknhb to be enabled.");

9920if (!isValidEGW(4,Op.getSimpleValueType(), Subtarget) ||

9921 !isValidEGW(4,Op->getOperand(1).getSimpleValueType(), Subtarget) ||

9922 !isValidEGW(4,Op->getOperand(2).getSimpleValueType(), Subtarget))

9923report_fatal_error("EGW should be greater than or equal to 4 * SEW.");

9924returnOp;

9925 }

9926case Intrinsic::riscv_sf_vc_v_x:

9927case Intrinsic::riscv_sf_vc_v_i:

9928case Intrinsic::riscv_sf_vc_v_xv:

9929case Intrinsic::riscv_sf_vc_v_iv:

9930case Intrinsic::riscv_sf_vc_v_vv:

9931case Intrinsic::riscv_sf_vc_v_fv:

9932case Intrinsic::riscv_sf_vc_v_xvv:

9933case Intrinsic::riscv_sf_vc_v_ivv:

9934case Intrinsic::riscv_sf_vc_v_vvv:

9935case Intrinsic::riscv_sf_vc_v_fvv:

9936case Intrinsic::riscv_sf_vc_v_xvw:

9937case Intrinsic::riscv_sf_vc_v_ivw:

9938case Intrinsic::riscv_sf_vc_v_vvw:

9939case Intrinsic::riscv_sf_vc_v_fvw: {

9940MVT VT =Op.getSimpleValueType();

9941

9942SmallVector<SDValue>Operands{Op->op_values()};

9943processVCIXOperands(Op,Operands, DAG);

9944

9945MVT RetVT = VT;

9946if (VT.isFixedLengthVector())

9947 RetVT =getContainerForFixedLengthVector(VT);

9948elseif (VT.isFloatingPoint())

9949 RetVT =MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),

9950 VT.getVectorElementCount());

9951

9952SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, RetVT,Operands);

9953

9954if (VT.isFixedLengthVector())

9955 NewNode =convertFromScalableVector(VT, NewNode, DAG, Subtarget);

9956elseif (VT.isFloatingPoint())

9957 NewNode = DAG.getBitcast(VT, NewNode);

9958

9959if (Op == NewNode)

9960break;

9961

9962return NewNode;

9963 }

9964 }

9965

9966returnlowerVectorIntrinsicScalars(Op, DAG, Subtarget);

9967}

9968

9969staticinlineSDValue getVCIXISDNodeWCHAIN(SDValue &Op,SelectionDAG &DAG,

9970unsignedType) {

9971SDLoc DL(Op);

9972SmallVector<SDValue>Operands{Op->op_values()};

9973Operands.erase(Operands.begin() + 1);

9974

9975constRISCVSubtarget &Subtarget =

9976 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();

9977MVT VT =Op.getSimpleValueType();

9978MVT RetVT = VT;

9979MVT FloatVT = VT;

9980

9981if (VT.isFloatingPoint()) {

9982 RetVT =MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),

9983 VT.getVectorElementCount());

9984 FloatVT = RetVT;

9985 }

9986if (VT.isFixedLengthVector())

9987 RetVT =getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), RetVT,

9988 Subtarget);

9989

9990processVCIXOperands(Op,Operands, DAG);

9991

9992SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});

9993SDValue NewNode = DAG.getNode(Type,DL, VTs,Operands);

9994SDValue Chain = NewNode.getValue(1);

9995

9996if (VT.isFixedLengthVector())

9997 NewNode =convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);

9998if (VT.isFloatingPoint())

9999 NewNode = DAG.getBitcast(VT, NewNode);

10000

10001 NewNode = DAG.getMergeValues({NewNode, Chain},DL);

10002

10003return NewNode;

10004}

10005

10006staticinlineSDValue getVCIXISDNodeVOID(SDValue &Op,SelectionDAG &DAG,

10007unsignedType) {

10008SmallVector<SDValue>Operands{Op->op_values()};

10009Operands.erase(Operands.begin() + 1);

10010processVCIXOperands(Op,Operands, DAG);

10011

10012return DAG.getNode(Type,SDLoc(Op),Op.getValueType(),Operands);

10013}

10014

10015SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,

10016SelectionDAG &DAG) const{

10017unsigned IntNo =Op.getConstantOperandVal(1);

10018switch (IntNo) {

10019default:

10020break;

10021case Intrinsic::riscv_seg2_load:

10022case Intrinsic::riscv_seg3_load:

10023case Intrinsic::riscv_seg4_load:

10024case Intrinsic::riscv_seg5_load:

10025case Intrinsic::riscv_seg6_load:

10026case Intrinsic::riscv_seg7_load:

10027case Intrinsic::riscv_seg8_load: {

10028SDLoc DL(Op);

10029staticconstIntrinsic::ID VlsegInts[7] = {

10030 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,

10031 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,

10032 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,

10033 Intrinsic::riscv_vlseg8};

10034unsigned NF =Op->getNumValues() - 1;

10035assert(NF >= 2 && NF <= 8 &&"Unexpected seg number");

10036MVT XLenVT = Subtarget.getXLenVT();

10037MVT VT =Op->getSimpleValueType(0);

10038MVT ContainerVT =getContainerForFixedLengthVector(VT);

10039unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *

10040 ContainerVT.getScalarSizeInBits();

10041EVT VecTupTy =MVT::getRISCVVectorTupleVT(Sz, NF);

10042

10043SDValue VL = DAG.getConstant(VT.getVectorNumElements(),DL, XLenVT);

10044SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2],DL, XLenVT);

10045auto *Load = cast<MemIntrinsicSDNode>(Op);

10046

10047SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});

10048SDValue Ops[] = {

10049Load->getChain(),

10050 IntID,

10051 DAG.getUNDEF(VecTupTy),

10052Op.getOperand(2),

10053 VL,

10054 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()),DL, XLenVT)};

10055SDValue Result =

10056 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops,

10057Load->getMemoryVT(),Load->getMemOperand());

10058SmallVector<SDValue, 9>Results;

10059for (unsignedint RetIdx = 0; RetIdx < NF; RetIdx++) {

10060SDValue SubVec =

10061 DAG.getNode(RISCVISD::TUPLE_EXTRACT,DL, ContainerVT,

10062Result.getValue(0), DAG.getVectorIdxConstant(RetIdx,DL));

10063Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));

10064 }

10065Results.push_back(Result.getValue(1));

10066return DAG.getMergeValues(Results,DL);

10067 }

10068case Intrinsic::riscv_sf_vc_v_x_se:

10069returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_X_SE);

10070case Intrinsic::riscv_sf_vc_v_i_se:

10071returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_I_SE);

10072case Intrinsic::riscv_sf_vc_v_xv_se:

10073returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_XV_SE);

10074case Intrinsic::riscv_sf_vc_v_iv_se:

10075returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_IV_SE);

10076case Intrinsic::riscv_sf_vc_v_vv_se:

10077returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_VV_SE);

10078case Intrinsic::riscv_sf_vc_v_fv_se:

10079returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_FV_SE);

10080case Intrinsic::riscv_sf_vc_v_xvv_se:

10081returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_XVV_SE);

10082case Intrinsic::riscv_sf_vc_v_ivv_se:

10083returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_IVV_SE);

10084case Intrinsic::riscv_sf_vc_v_vvv_se:

10085returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_VVV_SE);

10086case Intrinsic::riscv_sf_vc_v_fvv_se:

10087returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_FVV_SE);

10088case Intrinsic::riscv_sf_vc_v_xvw_se:

10089returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_XVW_SE);

10090case Intrinsic::riscv_sf_vc_v_ivw_se:

10091returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_IVW_SE);

10092case Intrinsic::riscv_sf_vc_v_vvw_se:

10093returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_VVW_SE);

10094case Intrinsic::riscv_sf_vc_v_fvw_se:

10095returngetVCIXISDNodeWCHAIN(Op, DAG,RISCVISD::SF_VC_V_FVW_SE);

10096 }

10097

10098returnlowerVectorIntrinsicScalars(Op, DAG, Subtarget);

10099}

10100

10101SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,

10102SelectionDAG &DAG) const{

10103unsigned IntNo =Op.getConstantOperandVal(1);

10104switch (IntNo) {

10105default:

10106break;

10107case Intrinsic::riscv_seg2_store:

10108case Intrinsic::riscv_seg3_store:

10109case Intrinsic::riscv_seg4_store:

10110case Intrinsic::riscv_seg5_store:

10111case Intrinsic::riscv_seg6_store:

10112case Intrinsic::riscv_seg7_store:

10113case Intrinsic::riscv_seg8_store: {

10114SDLoc DL(Op);

10115staticconstIntrinsic::ID VssegInts[] = {

10116 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,

10117 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,

10118 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,

10119 Intrinsic::riscv_vsseg8};

10120// Operands are (chain, int_id, vec*, ptr, vl)

10121unsigned NF =Op->getNumOperands() - 4;

10122assert(NF >= 2 && NF <= 8 &&"Unexpected seg number");

10123MVT XLenVT = Subtarget.getXLenVT();

10124MVT VT =Op->getOperand(2).getSimpleValueType();

10125MVT ContainerVT =getContainerForFixedLengthVector(VT);

10126unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *

10127 ContainerVT.getScalarSizeInBits();

10128EVT VecTupTy =MVT::getRISCVVectorTupleVT(Sz, NF);

10129

10130SDValue VL = DAG.getConstant(VT.getVectorNumElements(),DL, XLenVT);

10131SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2],DL, XLenVT);

10132SDValue Ptr =Op->getOperand(NF + 2);

10133

10134auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);

10135

10136SDValue StoredVal = DAG.getUNDEF(VecTupTy);

10137for (unsigned i = 0; i < NF; i++)

10138 StoredVal = DAG.getNode(

10139RISCVISD::TUPLE_INSERT,DL, VecTupTy, StoredVal,

10140convertToScalableVector(

10141 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),

10142 DAG.getVectorIdxConstant(i,DL));

10143

10144SDValue Ops[] = {

10145 FixedIntrinsic->getChain(),

10146 IntID,

10147 StoredVal,

10148Ptr,

10149 VL,

10150 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()),DL, XLenVT)};

10151

10152return DAG.getMemIntrinsicNode(

10153ISD::INTRINSIC_VOID,DL, DAG.getVTList(MVT::Other), Ops,

10154 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());

10155 }

10156case Intrinsic::riscv_sf_vc_xv_se:

10157returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_XV_SE);

10158case Intrinsic::riscv_sf_vc_iv_se:

10159returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_IV_SE);

10160case Intrinsic::riscv_sf_vc_vv_se:

10161returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_VV_SE);

10162case Intrinsic::riscv_sf_vc_fv_se:

10163returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_FV_SE);

10164case Intrinsic::riscv_sf_vc_xvv_se:

10165returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_XVV_SE);

10166case Intrinsic::riscv_sf_vc_ivv_se:

10167returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_IVV_SE);

10168case Intrinsic::riscv_sf_vc_vvv_se:

10169returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_VVV_SE);

10170case Intrinsic::riscv_sf_vc_fvv_se:

10171returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_FVV_SE);

10172case Intrinsic::riscv_sf_vc_xvw_se:

10173returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_XVW_SE);

10174case Intrinsic::riscv_sf_vc_ivw_se:

10175returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_IVW_SE);

10176case Intrinsic::riscv_sf_vc_vvw_se:

10177returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_VVW_SE);

10178case Intrinsic::riscv_sf_vc_fvw_se:

10179returngetVCIXISDNodeVOID(Op, DAG,RISCVISD::SF_VC_FVW_SE);

10180 }

10181

10182returnlowerVectorIntrinsicScalars(Op, DAG, Subtarget);

10183}

10184

10185staticunsignedgetRVVReductionOp(unsigned ISDOpcode) {

10186switch (ISDOpcode) {

10187default:

10188llvm_unreachable("Unhandled reduction");

10189case ISD::VP_REDUCE_ADD:

10190caseISD::VECREDUCE_ADD:

10191returnRISCVISD::VECREDUCE_ADD_VL;

10192case ISD::VP_REDUCE_UMAX:

10193caseISD::VECREDUCE_UMAX:

10194returnRISCVISD::VECREDUCE_UMAX_VL;

10195case ISD::VP_REDUCE_SMAX:

10196caseISD::VECREDUCE_SMAX:

10197returnRISCVISD::VECREDUCE_SMAX_VL;

10198case ISD::VP_REDUCE_UMIN:

10199caseISD::VECREDUCE_UMIN:

10200returnRISCVISD::VECREDUCE_UMIN_VL;

10201case ISD::VP_REDUCE_SMIN:

10202caseISD::VECREDUCE_SMIN:

10203returnRISCVISD::VECREDUCE_SMIN_VL;

10204case ISD::VP_REDUCE_AND:

10205caseISD::VECREDUCE_AND:

10206returnRISCVISD::VECREDUCE_AND_VL;

10207case ISD::VP_REDUCE_OR:

10208caseISD::VECREDUCE_OR:

10209returnRISCVISD::VECREDUCE_OR_VL;

10210case ISD::VP_REDUCE_XOR:

10211caseISD::VECREDUCE_XOR:

10212returnRISCVISD::VECREDUCE_XOR_VL;

10213case ISD::VP_REDUCE_FADD:

10214returnRISCVISD::VECREDUCE_FADD_VL;

10215case ISD::VP_REDUCE_SEQ_FADD:

10216returnRISCVISD::VECREDUCE_SEQ_FADD_VL;

10217case ISD::VP_REDUCE_FMAX:

10218case ISD::VP_REDUCE_FMAXIMUM:

10219returnRISCVISD::VECREDUCE_FMAX_VL;

10220case ISD::VP_REDUCE_FMIN:

10221case ISD::VP_REDUCE_FMINIMUM:

10222returnRISCVISD::VECREDUCE_FMIN_VL;

10223 }

10224

10225}

10226

10227SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,

10228SelectionDAG &DAG,

10229bool IsVP) const{

10230SDLoc DL(Op);

10231SDValue Vec =Op.getOperand(IsVP ? 1 : 0);

10232MVT VecVT = Vec.getSimpleValueType();

10233assert((Op.getOpcode() ==ISD::VECREDUCE_AND ||

10234Op.getOpcode() ==ISD::VECREDUCE_OR ||

10235Op.getOpcode() ==ISD::VECREDUCE_XOR ||

10236Op.getOpcode() == ISD::VP_REDUCE_AND ||

10237Op.getOpcode() == ISD::VP_REDUCE_OR ||

10238Op.getOpcode() == ISD::VP_REDUCE_XOR) &&

10239"Unexpected reduction lowering");

10240

10241MVT XLenVT = Subtarget.getXLenVT();

10242

10243MVT ContainerVT = VecVT;

10244if (VecVT.isFixedLengthVector()) {

10245 ContainerVT =getContainerForFixedLengthVector(VecVT);

10246 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

10247 }

10248

10249SDValue Mask, VL;

10250if (IsVP) {

10251Mask =Op.getOperand(2);

10252 VL =Op.getOperand(3);

10253 }else {

10254 std::tie(Mask, VL) =

10255getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);

10256 }

10257

10258ISD::CondCode CC;

10259switch (Op.getOpcode()) {

10260default:

10261llvm_unreachable("Unhandled reduction");

10262caseISD::VECREDUCE_AND:

10263case ISD::VP_REDUCE_AND: {

10264// vcpop ~x == 0

10265SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL,DL, ContainerVT, VL);

10266if (IsVP || VecVT.isFixedLengthVector())

10267 Vec = DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Vec, TrueMask, VL);

10268else

10269 Vec = DAG.getNode(ISD::XOR,DL, ContainerVT, Vec, TrueMask);

10270 Vec = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Vec, Mask, VL);

10271CC =ISD::SETEQ;

10272break;

10273 }

10274caseISD::VECREDUCE_OR:

10275case ISD::VP_REDUCE_OR:

10276// vcpop x != 0

10277 Vec = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Vec, Mask, VL);

10278CC =ISD::SETNE;

10279break;

10280caseISD::VECREDUCE_XOR:

10281case ISD::VP_REDUCE_XOR: {

10282// ((vcpop x) & 1) != 0

10283SDValue One = DAG.getConstant(1,DL, XLenVT);

10284 Vec = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Vec, Mask, VL);

10285 Vec = DAG.getNode(ISD::AND,DL, XLenVT, Vec, One);

10286CC =ISD::SETNE;

10287break;

10288 }

10289 }

10290

10291SDValue Zero = DAG.getConstant(0,DL, XLenVT);

10292SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero,CC);

10293 SetCC = DAG.getNode(ISD::TRUNCATE,DL,Op.getValueType(), SetCC);

10294

10295if (!IsVP)

10296return SetCC;

10297

10298// Now include the start value in the operation.

10299// Note that we must return the start value when no elements are operated

10300// upon. The vcpop instructions we've emitted in each case above will return

10301// 0 for an inactive vector, and so we've already received the neutral value:

10302// AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we

10303// can simply include the start value.

10304unsigned BaseOpc =ISD::getVecReduceBaseOpcode(Op.getOpcode());

10305return DAG.getNode(BaseOpc,DL,Op.getValueType(), SetCC,Op.getOperand(0));

10306}

10307

10308staticboolisNonZeroAVL(SDValue AVL) {

10309auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);

10310auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);

10311return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||

10312 (ImmAVL && ImmAVL->getZExtValue() >= 1);

10313}

10314

10315/// Helper to lower a reduction sequence of the form:

10316/// scalar = reduce_op vec, scalar_start

10317staticSDValue lowerReductionSeq(unsigned RVVOpcode,MVT ResVT,

10318SDValue StartValue,SDValue Vec,SDValue Mask,

10319SDValue VL,constSDLoc &DL,SelectionDAG &DAG,

10320constRISCVSubtarget &Subtarget) {

10321constMVT VecVT = Vec.getSimpleValueType();

10322constMVT M1VT =getLMUL1VT(VecVT);

10323constMVT XLenVT = Subtarget.getXLenVT();

10324constbool NonZeroAVL =isNonZeroAVL(VL);

10325

10326// The reduction needs an LMUL1 input; do the splat at either LMUL1

10327// or the original VT if fractional.

10328auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;

10329// We reuse the VL of the reduction to reduce vsetvli toggles if we can

10330// prove it is non-zero. For the AVL=0 case, we need the scalar to

10331// be the result of the reduction operation.

10332auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1,DL, XLenVT);

10333SDValue InitialValue =lowerScalarInsert(StartValue, InnerVL, InnerVT,DL,

10334 DAG, Subtarget);

10335if (M1VT != InnerVT)

10336 InitialValue =

10337 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, M1VT, DAG.getUNDEF(M1VT),

10338 InitialValue, DAG.getVectorIdxConstant(0,DL));

10339SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;

10340SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC,DL, XLenVT);

10341SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};

10342SDValue Reduction = DAG.getNode(RVVOpcode,DL, M1VT, Ops);

10343return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, ResVT,Reduction,

10344 DAG.getVectorIdxConstant(0,DL));

10345}

10346

10347SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,

10348SelectionDAG &DAG) const{

10349SDLoc DL(Op);

10350SDValue Vec =Op.getOperand(0);

10351EVT VecEVT = Vec.getValueType();

10352

10353unsigned BaseOpc =ISD::getVecReduceBaseOpcode(Op.getOpcode());

10354

10355// Due to ordering in legalize types we may have a vector type that needs to

10356// be split. Do that manually so we can get down to a legal type.

10357while (getTypeAction(*DAG.getContext(), VecEVT) ==

10358TargetLowering::TypeSplitVector) {

10359auto [Lo,Hi] = DAG.SplitVector(Vec,DL);

10360 VecEVT =Lo.getValueType();

10361 Vec = DAG.getNode(BaseOpc,DL, VecEVT,Lo,Hi);

10362 }

10363

10364// TODO: The type may need to be widened rather than split. Or widened before

10365// it can be split.

10366if (!isTypeLegal(VecEVT))

10367returnSDValue();

10368

10369MVT VecVT = VecEVT.getSimpleVT();

10370MVT VecEltVT = VecVT.getVectorElementType();

10371unsigned RVVOpcode =getRVVReductionOp(Op.getOpcode());

10372

10373MVT ContainerVT = VecVT;

10374if (VecVT.isFixedLengthVector()) {

10375 ContainerVT =getContainerForFixedLengthVector(VecVT);

10376 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

10377 }

10378

10379auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);

10380

10381SDValue StartV = DAG.getNeutralElement(BaseOpc,DL, VecEltVT,SDNodeFlags());

10382switch (BaseOpc) {

10383caseISD::AND:

10384caseISD::OR:

10385caseISD::UMAX:

10386caseISD::UMIN:

10387caseISD::SMAX:

10388caseISD::SMIN:

10389 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, VecEltVT, Vec,

10390 DAG.getVectorIdxConstant(0,DL));

10391 }

10392returnlowerReductionSeq(RVVOpcode,Op.getSimpleValueType(), StartV, Vec,

10393 Mask, VL,DL, DAG, Subtarget);

10394}

10395

10396// Given a reduction op, this function returns the matching reduction opcode,

10397// the vector SDValue and the scalar SDValue required to lower this to a

10398// RISCVISD node.

10399static std::tuple<unsigned, SDValue, SDValue>

10400getRVVFPReductionOpAndOperands(SDValue Op,SelectionDAG &DAG,EVT EltVT,

10401constRISCVSubtarget &Subtarget) {

10402SDLoc DL(Op);

10403auto Flags =Op->getFlags();

10404unsigned Opcode =Op.getOpcode();

10405switch (Opcode) {

10406default:

10407llvm_unreachable("Unhandled reduction");

10408caseISD::VECREDUCE_FADD: {

10409// Use positive zero if we can. It is cheaper to materialize.

10410SDValue Zero =

10411 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0,DL, EltVT);

10412return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL,Op.getOperand(0), Zero);

10413 }

10414caseISD::VECREDUCE_SEQ_FADD:

10415return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL,Op.getOperand(1),

10416Op.getOperand(0));

10417caseISD::VECREDUCE_FMINIMUM:

10418caseISD::VECREDUCE_FMAXIMUM:

10419caseISD::VECREDUCE_FMIN:

10420caseISD::VECREDUCE_FMAX: {

10421SDValue Front =

10422 DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, EltVT,Op.getOperand(0),

10423 DAG.getVectorIdxConstant(0,DL));

10424unsigned RVVOpc =

10425 (Opcode ==ISD::VECREDUCE_FMIN || Opcode ==ISD::VECREDUCE_FMINIMUM)

10426 ?RISCVISD::VECREDUCE_FMIN_VL

10427 :RISCVISD::VECREDUCE_FMAX_VL;

10428return std::make_tuple(RVVOpc,Op.getOperand(0), Front);

10429 }

10430 }

10431}

10432

10433SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,

10434SelectionDAG &DAG) const{

10435SDLoc DL(Op);

10436MVT VecEltVT =Op.getSimpleValueType();

10437

10438unsigned RVVOpcode;

10439SDValue VectorVal, ScalarVal;

10440 std::tie(RVVOpcode, VectorVal, ScalarVal) =

10441getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);

10442MVT VecVT = VectorVal.getSimpleValueType();

10443

10444MVT ContainerVT = VecVT;

10445if (VecVT.isFixedLengthVector()) {

10446 ContainerVT =getContainerForFixedLengthVector(VecVT);

10447 VectorVal =convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);

10448 }

10449

10450MVT ResVT =Op.getSimpleValueType();

10451auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);

10452SDValue Res =lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,

10453 VL,DL, DAG, Subtarget);

10454if (Op.getOpcode() !=ISD::VECREDUCE_FMINIMUM &&

10455Op.getOpcode() !=ISD::VECREDUCE_FMAXIMUM)

10456return Res;

10457

10458if (Op->getFlags().hasNoNaNs())

10459return Res;

10460

10461// Force output to NaN if any element is Nan.

10462SDValue IsNan =

10463 DAG.getNode(RISCVISD::SETCC_VL,DL,Mask.getValueType(),

10464 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),

10465 DAG.getUNDEF(Mask.getValueType()), Mask, VL});

10466MVT XLenVT = Subtarget.getXLenVT();

10467SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, IsNan, Mask, VL);

10468SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,

10469 DAG.getConstant(0,DL, XLenVT),ISD::SETEQ);

10470return DAG.getSelect(

10471DL, ResVT, NoNaNs, Res,

10472 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()),DL, ResVT));

10473}

10474

10475SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,

10476SelectionDAG &DAG) const{

10477SDLoc DL(Op);

10478unsigned Opc =Op.getOpcode();

10479SDValue Start =Op.getOperand(0);

10480SDValue Vec =Op.getOperand(1);

10481EVT VecEVT = Vec.getValueType();

10482MVT XLenVT = Subtarget.getXLenVT();

10483

10484// TODO: The type may need to be widened rather than split. Or widened before

10485// it can be split.

10486if (!isTypeLegal(VecEVT))

10487returnSDValue();

10488

10489MVT VecVT = VecEVT.getSimpleVT();

10490unsigned RVVOpcode =getRVVReductionOp(Opc);

10491

10492if (VecVT.isFixedLengthVector()) {

10493auto ContainerVT =getContainerForFixedLengthVector(VecVT);

10494 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

10495 }

10496

10497SDValue VL =Op.getOperand(3);

10498SDValue Mask =Op.getOperand(2);

10499SDValue Res =

10500lowerReductionSeq(RVVOpcode,Op.getSimpleValueType(),Op.getOperand(0),

10501 Vec, Mask, VL,DL, DAG, Subtarget);

10502if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||

10503Op->getFlags().hasNoNaNs())

10504return Res;

10505

10506// Propagate NaNs.

10507MVT PredVT =getMaskTypeFor(Vec.getSimpleValueType());

10508// Check if any of the elements in Vec is NaN.

10509SDValue IsNaN = DAG.getNode(

10510RISCVISD::SETCC_VL,DL, PredVT,

10511 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});

10512SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, IsNaN, Mask, VL);

10513// Check if the start value is NaN.

10514SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start,ISD::SETUO);

10515 VCPop = DAG.getNode(ISD::OR,DL, XLenVT, VCPop, StartIsNaN);

10516SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,

10517 DAG.getConstant(0,DL, XLenVT),ISD::SETEQ);

10518MVT ResVT = Res.getSimpleValueType();

10519return DAG.getSelect(

10520DL, ResVT, NoNaNs, Res,

10521 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()),DL, ResVT));

10522}

10523

10524SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,

10525SelectionDAG &DAG) const{

10526SDValue Vec =Op.getOperand(0);

10527SDValue SubVec =Op.getOperand(1);

10528MVT VecVT = Vec.getSimpleValueType();

10529MVT SubVecVT = SubVec.getSimpleValueType();

10530

10531SDLoc DL(Op);

10532MVT XLenVT = Subtarget.getXLenVT();

10533unsigned OrigIdx =Op.getConstantOperandVal(2);

10534constRISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();

10535

10536if (OrigIdx == 0 && Vec.isUndef())

10537returnOp;

10538

10539// We don't have the ability to slide mask vectors up indexed by their i1

10540// elements; the smallest we can do is i8. Often we are able to bitcast to

10541// equivalent i8 vectors. Note that when inserting a fixed-length vector

10542// into a scalable one, we might not necessarily have enough scalable

10543// elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.

10544if (SubVecVT.getVectorElementType() == MVT::i1) {

10545if (VecVT.getVectorMinNumElements() >= 8 &&

10546 SubVecVT.getVectorMinNumElements() >= 8) {

10547assert(OrigIdx % 8 == 0 &&"Invalid index");

10548assert(VecVT.getVectorMinNumElements() % 8 == 0 &&

10549 SubVecVT.getVectorMinNumElements() % 8 == 0 &&

10550"Unexpected mask vector lowering");

10551 OrigIdx /= 8;

10552 SubVecVT =

10553MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,

10554 SubVecVT.isScalableVector());

10555 VecVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,

10556 VecVT.isScalableVector());

10557 Vec = DAG.getBitcast(VecVT, Vec);

10558 SubVec = DAG.getBitcast(SubVecVT, SubVec);

10559 }else {

10560// We can't slide this mask vector up indexed by its i1 elements.

10561// This poses a problem when we wish to insert a scalable vector which

10562// can't be re-expressed as a larger type. Just choose the slow path and

10563// extend to a larger type, then truncate back down.

10564MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);

10565MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);

10566 Vec = DAG.getNode(ISD::ZERO_EXTEND,DL, ExtVecVT, Vec);

10567 SubVec = DAG.getNode(ISD::ZERO_EXTEND,DL, ExtSubVecVT, SubVec);

10568 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ExtVecVT, Vec, SubVec,

10569Op.getOperand(2));

10570SDValue SplatZero = DAG.getConstant(0,DL, ExtVecVT);

10571return DAG.getSetCC(DL, VecVT, Vec, SplatZero,ISD::SETNE);

10572 }

10573 }

10574

10575// If the subvector vector is a fixed-length type and we don't know VLEN

10576// exactly, we cannot use subregister manipulation to simplify the codegen; we

10577// don't know which register of a LMUL group contains the specific subvector

10578// as we only know the minimum register size. Therefore we must slide the

10579// vector group up the full amount.

10580constauto VLen = Subtarget.getRealVLen();

10581if (SubVecVT.isFixedLengthVector() && !VLen) {

10582MVT ContainerVT = VecVT;

10583if (VecVT.isFixedLengthVector()) {

10584 ContainerVT =getContainerForFixedLengthVector(VecVT);

10585 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

10586 }

10587

10588 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVT,

10589 DAG.getUNDEF(ContainerVT), SubVec,

10590 DAG.getVectorIdxConstant(0,DL));

10591

10592SDValue Mask =

10593getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget).first;

10594// Set the vector length to only the number of elements we care about. Note

10595// that for slideup this includes the offset.

10596unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();

10597SDValue VL = DAG.getConstant(EndIndex,DL, XLenVT);

10598

10599// Use tail agnostic policy if we're inserting over Vec's tail.

10600unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;

10601if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())

10602 Policy =RISCVII::TAIL_AGNOSTIC;

10603

10604// If we're inserting into the lowest elements, use a tail undisturbed

10605// vmv.v.v.

10606if (OrigIdx == 0) {

10607 SubVec =

10608 DAG.getNode(RISCVISD::VMV_V_V_VL,DL, ContainerVT, Vec, SubVec, VL);

10609 }else {

10610SDValue SlideupAmt = DAG.getConstant(OrigIdx,DL, XLenVT);

10611 SubVec =getVSlideup(DAG, Subtarget,DL, ContainerVT, Vec, SubVec,

10612 SlideupAmt, Mask, VL, Policy);

10613 }

10614

10615if (VecVT.isFixedLengthVector())

10616 SubVec =convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);

10617return DAG.getBitcast(Op.getValueType(), SubVec);

10618 }

10619

10620MVT ContainerVecVT = VecVT;

10621if (VecVT.isFixedLengthVector()) {

10622 ContainerVecVT =getContainerForFixedLengthVector(VecVT);

10623 Vec =convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);

10624 }

10625

10626MVT ContainerSubVecVT = SubVecVT;

10627if (SubVecVT.isFixedLengthVector()) {

10628 ContainerSubVecVT =getContainerForFixedLengthVector(SubVecVT);

10629 SubVec =convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);

10630 }

10631

10632unsigned SubRegIdx;

10633ElementCount RemIdx;

10634// insert_subvector scales the index by vscale if the subvector is scalable,

10635// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if

10636// we have a fixed length subvector, we need to adjust the index by 1/vscale.

10637if (SubVecVT.isFixedLengthVector()) {

10638assert(VLen);

10639unsigned Vscale = *VLen /RISCV::RVVBitsPerBlock;

10640auto Decompose =

10641RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

10642 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale,TRI);

10643 SubRegIdx = Decompose.first;

10644 RemIdx =ElementCount::getFixed((Decompose.second * Vscale) +

10645 (OrigIdx % Vscale));

10646 }else {

10647auto Decompose =

10648RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

10649 ContainerVecVT, ContainerSubVecVT, OrigIdx,TRI);

10650 SubRegIdx = Decompose.first;

10651 RemIdx =ElementCount::getScalable(Decompose.second);

10652 }

10653

10654TypeSize VecRegSize =TypeSize::getScalable(RISCV::RVVBitsPerBlock);

10655assert(isPowerOf2_64(

10656 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));

10657bool ExactlyVecRegSized =

10658 Subtarget.expandVScale(SubVecVT.getSizeInBits())

10659 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));

10660

10661// 1. If the Idx has been completely eliminated and this subvector's size is

10662// a vector register or a multiple thereof, or the surrounding elements are

10663// undef, then this is a subvector insert which naturally aligns to a vector

10664// register. These can easily be handled using subregister manipulation.

10665// 2. If the subvector isn't an exact multiple of a valid register group size,

10666// then the insertion must preserve the undisturbed elements of the register.

10667// We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1

10668// vector type (which resolves to a subregister copy), performing a VSLIDEUP

10669// to place the subvector within the vector register, and an INSERT_SUBVECTOR

10670// of that LMUL=1 type back into the larger vector (resolving to another

10671// subregister operation). See below for how our VSLIDEUP works. We go via a

10672// LMUL=1 type to avoid allocating a large register group to hold our

10673// subvector.

10674if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {

10675if (SubVecVT.isFixedLengthVector()) {

10676// We may get NoSubRegister if inserting at index 0 and the subvec

10677// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0

10678if (SubRegIdx == RISCV::NoSubRegister) {

10679assert(OrigIdx == 0);

10680returnOp;

10681 }

10682

10683// Use a insert_subvector that will resolve to an insert subreg.

10684assert(VLen);

10685unsigned Vscale = *VLen /RISCV::RVVBitsPerBlock;

10686SDValue Insert =

10687 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVecVT, Vec, SubVec,

10688 DAG.getConstant(OrigIdx / Vscale,DL, XLenVT));

10689if (VecVT.isFixedLengthVector())

10690Insert =convertFromScalableVector(VecVT, Insert, DAG, Subtarget);

10691returnInsert;

10692 }

10693returnOp;

10694 }

10695

10696// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements

10697// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy

10698// (in our case undisturbed). This means we can set up a subvector insertion

10699// where OFFSET is the insertion offset, and the VL is the OFFSET plus the

10700// size of the subvector.

10701MVT InterSubVT = ContainerVecVT;

10702SDValue AlignedExtract = Vec;

10703unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();

10704if (SubVecVT.isFixedLengthVector()) {

10705assert(VLen);

10706 AlignedIdx /= *VLen /RISCV::RVVBitsPerBlock;

10707 }

10708if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {

10709 InterSubVT =getLMUL1VT(ContainerVecVT);

10710// Extract a subvector equal to the nearest full vector register type. This

10711// should resolve to a EXTRACT_SUBREG instruction.

10712 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, InterSubVT, Vec,

10713 DAG.getVectorIdxConstant(AlignedIdx,DL));

10714 }

10715

10716 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, InterSubVT,

10717 DAG.getUNDEF(InterSubVT), SubVec,

10718 DAG.getVectorIdxConstant(0,DL));

10719

10720auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVecVT,DL, DAG, Subtarget);

10721

10722ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();

10723 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());

10724

10725// Use tail agnostic policy if we're inserting over InterSubVT's tail.

10726unsigned Policy =RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;

10727if (Subtarget.expandVScale(EndIndex) ==

10728 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))

10729 Policy =RISCVII::TAIL_AGNOSTIC;

10730

10731// If we're inserting into the lowest elements, use a tail undisturbed

10732// vmv.v.v.

10733if (RemIdx.isZero()) {

10734 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL,DL, InterSubVT, AlignedExtract,

10735 SubVec, VL);

10736 }else {

10737SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);

10738

10739// Construct the vector length corresponding to RemIdx + length(SubVecVT).

10740 VL = DAG.getNode(ISD::ADD,DL, XLenVT, SlideupAmt, VL);

10741

10742 SubVec =getVSlideup(DAG, Subtarget,DL, InterSubVT, AlignedExtract, SubVec,

10743 SlideupAmt, Mask, VL, Policy);

10744 }

10745

10746// If required, insert this subvector back into the correct vector register.

10747// This should resolve to an INSERT_SUBREG instruction.

10748if (ContainerVecVT.bitsGT(InterSubVT))

10749 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ContainerVecVT, Vec, SubVec,

10750 DAG.getVectorIdxConstant(AlignedIdx,DL));

10751

10752if (VecVT.isFixedLengthVector())

10753 SubVec =convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);

10754

10755// We might have bitcast from a mask type: cast back to the original type if

10756// required.

10757return DAG.getBitcast(Op.getSimpleValueType(), SubVec);

10758}

10759

10760SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,

10761SelectionDAG &DAG) const{

10762SDValue Vec =Op.getOperand(0);

10763MVT SubVecVT =Op.getSimpleValueType();

10764MVT VecVT = Vec.getSimpleValueType();

10765

10766SDLoc DL(Op);

10767MVT XLenVT = Subtarget.getXLenVT();

10768unsigned OrigIdx =Op.getConstantOperandVal(1);

10769constRISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();

10770

10771// With an index of 0 this is a cast-like subvector, which can be performed

10772// with subregister operations.

10773if (OrigIdx == 0)

10774returnOp;

10775

10776// We don't have the ability to slide mask vectors down indexed by their i1

10777// elements; the smallest we can do is i8. Often we are able to bitcast to

10778// equivalent i8 vectors. Note that when extracting a fixed-length vector

10779// from a scalable one, we might not necessarily have enough scalable

10780// elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.

10781if (SubVecVT.getVectorElementType() == MVT::i1) {

10782if (VecVT.getVectorMinNumElements() >= 8 &&

10783 SubVecVT.getVectorMinNumElements() >= 8) {

10784assert(OrigIdx % 8 == 0 &&"Invalid index");

10785assert(VecVT.getVectorMinNumElements() % 8 == 0 &&

10786 SubVecVT.getVectorMinNumElements() % 8 == 0 &&

10787"Unexpected mask vector lowering");

10788 OrigIdx /= 8;

10789 SubVecVT =

10790MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,

10791 SubVecVT.isScalableVector());

10792 VecVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,

10793 VecVT.isScalableVector());

10794 Vec = DAG.getBitcast(VecVT, Vec);

10795 }else {

10796// We can't slide this mask vector down, indexed by its i1 elements.

10797// This poses a problem when we wish to extract a scalable vector which

10798// can't be re-expressed as a larger type. Just choose the slow path and

10799// extend to a larger type, then truncate back down.

10800// TODO: We could probably improve this when extracting certain fixed

10801// from fixed, where we can extract as i8 and shift the correct element

10802// right to reach the desired subvector?

10803MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);

10804MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);

10805 Vec = DAG.getNode(ISD::ZERO_EXTEND,DL, ExtVecVT, Vec);

10806 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ExtSubVecVT, Vec,

10807Op.getOperand(1));

10808SDValue SplatZero = DAG.getConstant(0,DL, ExtSubVecVT);

10809return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero,ISD::SETNE);

10810 }

10811 }

10812

10813constauto VLen = Subtarget.getRealVLen();

10814

10815// If the subvector vector is a fixed-length type and we don't know VLEN

10816// exactly, we cannot use subregister manipulation to simplify the codegen; we

10817// don't know which register of a LMUL group contains the specific subvector

10818// as we only know the minimum register size. Therefore we must slide the

10819// vector group down the full amount.

10820if (SubVecVT.isFixedLengthVector() && !VLen) {

10821MVT ContainerVT = VecVT;

10822if (VecVT.isFixedLengthVector()) {

10823 ContainerVT =getContainerForFixedLengthVector(VecVT);

10824 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

10825 }

10826

10827// Shrink down Vec so we're performing the slidedown on a smaller LMUL.

10828unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;

10829if (auto ShrunkVT =

10830getSmallestVTForIndex(ContainerVT, LastIdx,DL, DAG, Subtarget)) {

10831 ContainerVT = *ShrunkVT;

10832 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerVT, Vec,

10833 DAG.getVectorIdxConstant(0,DL));

10834 }

10835

10836SDValue Mask =

10837getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget).first;

10838// Set the vector length to only the number of elements we care about. This

10839// avoids sliding down elements we're going to discard straight away.

10840SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(),DL, XLenVT);

10841SDValue SlidedownAmt = DAG.getConstant(OrigIdx,DL, XLenVT);

10842SDValue Slidedown =

10843getVSlidedown(DAG, Subtarget,DL, ContainerVT,

10844 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);

10845// Now we can use a cast-like subvector extract to get the result.

10846 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, SubVecVT, Slidedown,

10847 DAG.getVectorIdxConstant(0,DL));

10848return DAG.getBitcast(Op.getValueType(), Slidedown);

10849 }

10850

10851if (VecVT.isFixedLengthVector()) {

10852 VecVT =getContainerForFixedLengthVector(VecVT);

10853 Vec =convertToScalableVector(VecVT, Vec, DAG, Subtarget);

10854 }

10855

10856MVT ContainerSubVecVT = SubVecVT;

10857if (SubVecVT.isFixedLengthVector())

10858 ContainerSubVecVT =getContainerForFixedLengthVector(SubVecVT);

10859

10860unsigned SubRegIdx;

10861ElementCount RemIdx;

10862// extract_subvector scales the index by vscale if the subvector is scalable,

10863// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if

10864// we have a fixed length subvector, we need to adjust the index by 1/vscale.

10865if (SubVecVT.isFixedLengthVector()) {

10866assert(VLen);

10867unsigned Vscale = *VLen /RISCV::RVVBitsPerBlock;

10868auto Decompose =

10869RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

10870 VecVT, ContainerSubVecVT, OrigIdx / Vscale,TRI);

10871 SubRegIdx = Decompose.first;

10872 RemIdx =ElementCount::getFixed((Decompose.second * Vscale) +

10873 (OrigIdx % Vscale));

10874 }else {

10875auto Decompose =

10876RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(

10877 VecVT, ContainerSubVecVT, OrigIdx,TRI);

10878 SubRegIdx = Decompose.first;

10879 RemIdx =ElementCount::getScalable(Decompose.second);

10880 }

10881

10882// If the Idx has been completely eliminated then this is a subvector extract

10883// which naturally aligns to a vector register. These can easily be handled

10884// using subregister manipulation. We use an extract_subvector that will

10885// resolve to an extract subreg.

10886if (RemIdx.isZero()) {

10887if (SubVecVT.isFixedLengthVector()) {

10888assert(VLen);

10889unsigned Vscale = *VLen /RISCV::RVVBitsPerBlock;

10890 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ContainerSubVecVT, Vec,

10891 DAG.getConstant(OrigIdx / Vscale,DL, XLenVT));

10892returnconvertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);

10893 }

10894returnOp;

10895 }

10896

10897// Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT

10898// was > M1 then the index would need to be a multiple of VLMAX, and so would

10899// divide exactly.

10900assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||

10901getLMUL(ContainerSubVecVT) ==RISCVII::VLMUL::LMUL_1);

10902

10903// If the vector type is an LMUL-group type, extract a subvector equal to the

10904// nearest full vector register type.

10905MVT InterSubVT = VecVT;

10906if (VecVT.bitsGT(getLMUL1VT(VecVT))) {

10907// If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and

10908// we should have successfully decomposed the extract into a subregister.

10909// We use an extract_subvector that will resolve to a subreg extract.

10910assert(SubRegIdx != RISCV::NoSubRegister);

10911 (void)SubRegIdx;

10912unsignedIdx = OrigIdx - RemIdx.getKnownMinValue();

10913if (SubVecVT.isFixedLengthVector()) {

10914assert(VLen);

10915Idx /= *VLen /RISCV::RVVBitsPerBlock;

10916 }

10917 InterSubVT =getLMUL1VT(VecVT);

10918 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, InterSubVT, Vec,

10919 DAG.getConstant(Idx,DL, XLenVT));

10920 }

10921

10922// Slide this vector register down by the desired number of elements in order

10923// to place the desired subvector starting at element 0.

10924SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);

10925auto [Mask, VL] =getDefaultScalableVLOps(InterSubVT,DL, DAG, Subtarget);

10926if (SubVecVT.isFixedLengthVector())

10927 VL = DAG.getConstant(SubVecVT.getVectorNumElements(),DL, XLenVT);

10928SDValue Slidedown =

10929getVSlidedown(DAG, Subtarget,DL, InterSubVT, DAG.getUNDEF(InterSubVT),

10930 Vec, SlidedownAmt, Mask, VL);

10931

10932// Now the vector is in the right position, extract our final subvector. This

10933// should resolve to a COPY.

10934 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, SubVecVT, Slidedown,

10935 DAG.getVectorIdxConstant(0,DL));

10936

10937// We might have bitcast from a mask type: cast back to the original type if

10938// required.

10939return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);

10940}

10941

10942// Widen a vector's operands to i8, then truncate its results back to the

10943// original type, typically i1. All operand and result types must be the same.

10944staticSDValue widenVectorOpsToi8(SDValue N,constSDLoc &DL,

10945SelectionDAG &DAG) {

10946MVT VT =N.getSimpleValueType();

10947MVT WideVT = VT.changeVectorElementType(MVT::i8);

10948SmallVector<SDValue, 4> WideOps;

10949for (SDValue Op :N->ops()) {

10950assert(Op.getSimpleValueType() == VT &&

10951"Operands and result must be same type");

10952 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND,DL, WideVT,Op));

10953 }

10954

10955unsigned NumVals =N->getNumValues();

10956

10957SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(

10958 NumVals,N.getValueType().changeVectorElementType(MVT::i8)));

10959SDValue WideN = DAG.getNode(N.getOpcode(),DL, VTs, WideOps);

10960SmallVector<SDValue, 4> TruncVals;

10961for (unsignedI = 0;I < NumVals;I++) {

10962 TruncVals.push_back(

10963 DAG.getSetCC(DL,N->getSimpleValueType(I), WideN.getValue(I),

10964 DAG.getConstant(0,DL, WideVT),ISD::SETNE));

10965 }

10966

10967if (TruncVals.size() > 1)

10968return DAG.getMergeValues(TruncVals,DL);

10969return TruncVals.front();

10970}

10971

10972SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,

10973SelectionDAG &DAG) const{

10974SDLoc DL(Op);

10975MVT VecVT =Op.getSimpleValueType();

10976

10977assert(VecVT.isScalableVector() &&

10978"vector_interleave on non-scalable vector!");

10979

10980// 1 bit element vectors need to be widened to e8

10981if (VecVT.getVectorElementType() == MVT::i1)

10982returnwidenVectorOpsToi8(Op,DL, DAG);

10983

10984// If the VT is LMUL=8, we need to split and reassemble.

10985if (VecVT.getSizeInBits().getKnownMinValue() ==

10986 (8 *RISCV::RVVBitsPerBlock)) {

10987auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);

10988auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);

10989EVT SplitVT = Op0Lo.getValueType();

10990

10991SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE,DL,

10992 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);

10993SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE,DL,

10994 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);

10995

10996SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS,DL, VecVT,

10997 ResLo.getValue(0), ResHi.getValue(0));

10998SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS,DL, VecVT, ResLo.getValue(1),

10999 ResHi.getValue(1));

11000return DAG.getMergeValues({Even, Odd},DL);

11001 }

11002

11003// Concatenate the two vectors as one vector to deinterleave

11004MVT ConcatVT =

11005MVT::getVectorVT(VecVT.getVectorElementType(),

11006 VecVT.getVectorElementCount().multiplyCoefficientBy(2));

11007SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS,DL, ConcatVT,

11008Op.getOperand(0),Op.getOperand(1));

11009

11010// We can deinterleave through vnsrl.wi if the element type is smaller than

11011// ELEN

11012if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {

11013SDValue Even =getDeinterleaveShiftAndTrunc(DL, VecVT,Concat, 2, 0, DAG);

11014SDValue Odd =getDeinterleaveShiftAndTrunc(DL, VecVT,Concat, 2, 1, DAG);

11015return DAG.getMergeValues({Even, Odd},DL);

11016 }

11017

11018// For the indices, use the vmv.v.x of an i8 constant to fill the largest

11019// possibly mask vector, then extract the required subvector. Doing this

11020// (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask

11021// creation to be rematerialized during register allocation to reduce

11022// register pressure if needed.

11023

11024MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);

11025

11026SDValue EvenSplat = DAG.getConstant(0b01010101,DL, MVT::nxv8i8);

11027 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);

11028SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, MaskVT, EvenSplat,

11029 DAG.getVectorIdxConstant(0,DL));

11030

11031SDValue OddSplat = DAG.getConstant(0b10101010,DL, MVT::nxv8i8);

11032 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);

11033SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, MaskVT, OddSplat,

11034 DAG.getVectorIdxConstant(0,DL));

11035

11036// vcompress the even and odd elements into two separate vectors

11037SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS,DL, ConcatVT,Concat,

11038 EvenMask, DAG.getUNDEF(ConcatVT));

11039SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS,DL, ConcatVT,Concat,

11040 OddMask, DAG.getUNDEF(ConcatVT));

11041

11042// Extract the result half of the gather for even and odd

11043SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VecVT, EvenWide,

11044 DAG.getVectorIdxConstant(0,DL));

11045SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VecVT, OddWide,

11046 DAG.getVectorIdxConstant(0,DL));

11047

11048return DAG.getMergeValues({Even, Odd},DL);

11049}

11050

11051SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,

11052SelectionDAG &DAG) const{

11053SDLoc DL(Op);

11054MVT VecVT =Op.getSimpleValueType();

11055

11056assert(VecVT.isScalableVector() &&

11057"vector_interleave on non-scalable vector!");

11058

11059// i1 vectors need to be widened to i8

11060if (VecVT.getVectorElementType() == MVT::i1)

11061returnwidenVectorOpsToi8(Op,DL, DAG);

11062

11063MVT XLenVT = Subtarget.getXLenVT();

11064SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);

11065

11066// If the VT is LMUL=8, we need to split and reassemble.

11067if (VecVT.getSizeInBits().getKnownMinValue() == (8 *RISCV::RVVBitsPerBlock)) {

11068auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);

11069auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);

11070EVT SplitVT = Op0Lo.getValueType();

11071

11072SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE,DL,

11073 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);

11074SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE,DL,

11075 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);

11076

11077SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS,DL, VecVT,

11078 ResLo.getValue(0), ResLo.getValue(1));

11079SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS,DL, VecVT,

11080 ResHi.getValue(0), ResHi.getValue(1));

11081return DAG.getMergeValues({Lo,Hi},DL);

11082 }

11083

11084SDValue Interleaved;

11085

11086// If the element type is smaller than ELEN, then we can interleave with

11087// vwaddu.vv and vwmaccu.vx

11088if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {

11089 Interleaved =getWideningInterleave(Op.getOperand(0),Op.getOperand(1),DL,

11090 DAG, Subtarget);

11091 }else {

11092// Otherwise, fallback to using vrgathere16.vv

11093MVT ConcatVT =

11094MVT::getVectorVT(VecVT.getVectorElementType(),

11095 VecVT.getVectorElementCount().multiplyCoefficientBy(2));

11096SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS,DL, ConcatVT,

11097Op.getOperand(0),Op.getOperand(1));

11098

11099MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);

11100

11101// 0 1 2 3 4 5 6 7 ...

11102SDValue StepVec = DAG.getStepVector(DL, IdxVT);

11103

11104// 1 1 1 1 1 1 1 1 ...

11105SDValue Ones = DAG.getSplatVector(IdxVT,DL, DAG.getConstant(1,DL, XLenVT));

11106

11107// 1 0 1 0 1 0 1 0 ...

11108SDValue OddMask = DAG.getNode(ISD::AND,DL, IdxVT, StepVec, Ones);

11109 OddMask = DAG.getSetCC(

11110DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,

11111 DAG.getSplatVector(IdxVT,DL, DAG.getConstant(0,DL, XLenVT)),

11112ISD::CondCode::SETNE);

11113

11114SDValue VLMax = DAG.getSplatVector(IdxVT,DL,computeVLMax(VecVT,DL, DAG));

11115

11116// Build up the index vector for interleaving the concatenated vector

11117// 0 0 1 1 2 2 3 3 ...

11118SDValue Idx = DAG.getNode(ISD::SRL,DL, IdxVT, StepVec, Ones);

11119// 0 n 1 n+1 2 n+2 3 n+3 ...

11120Idx =

11121 DAG.getNode(RISCVISD::ADD_VL,DL, IdxVT,Idx, VLMax,Idx, OddMask, VL);

11122

11123// Then perform the interleave

11124// v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...

11125SDValue TrueMask =getAllOnesMask(IdxVT, VL,DL, DAG);

11126 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL,DL, ConcatVT,

11127Concat,Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);

11128 }

11129

11130// Extract the two halves from the interleaved result

11131SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, VecVT, Interleaved,

11132 DAG.getVectorIdxConstant(0,DL));

11133SDValue Hi = DAG.getNode(

11134ISD::EXTRACT_SUBVECTOR,DL, VecVT, Interleaved,

11135 DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(),DL));

11136

11137return DAG.getMergeValues({Lo,Hi},DL);

11138}

11139

11140// Lower step_vector to the vid instruction. Any non-identity step value must

11141// be accounted for my manual expansion.

11142SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,

11143SelectionDAG &DAG) const{

11144SDLoc DL(Op);

11145MVT VT =Op.getSimpleValueType();

11146assert(VT.isScalableVector() &&"Expected scalable vector");

11147MVT XLenVT = Subtarget.getXLenVT();

11148auto [Mask, VL] =getDefaultScalableVLOps(VT,DL, DAG, Subtarget);

11149SDValue StepVec = DAG.getNode(RISCVISD::VID_VL,DL, VT, Mask, VL);

11150uint64_t StepValImm =Op.getConstantOperandVal(0);

11151if (StepValImm != 1) {

11152if (isPowerOf2_64(StepValImm)) {

11153SDValue StepVal =

11154 DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, DAG.getUNDEF(VT),

11155 DAG.getConstant(Log2_64(StepValImm),DL, XLenVT), VL);

11156 StepVec = DAG.getNode(ISD::SHL,DL, VT, StepVec, StepVal);

11157 }else {

11158SDValue StepVal =lowerScalarSplat(

11159SDValue(), DAG.getConstant(StepValImm,DL, VT.getVectorElementType()),

11160 VL, VT,DL, DAG, Subtarget);

11161 StepVec = DAG.getNode(ISD::MUL,DL, VT, StepVec, StepVal);

11162 }

11163 }

11164return StepVec;

11165}

11166

11167// Implement vector_reverse using vrgather.vv with indices determined by

11168// subtracting the id of each element from (VLMAX-1). This will convert

11169// the indices like so:

11170// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).

11171// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.

11172SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,

11173SelectionDAG &DAG) const{

11174SDLoc DL(Op);

11175MVT VecVT =Op.getSimpleValueType();

11176if (VecVT.getVectorElementType() == MVT::i1) {

11177MVT WidenVT =MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());

11178SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND,DL, WidenVT,Op.getOperand(0));

11179SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE,DL, WidenVT, Op1);

11180return DAG.getSetCC(DL, VecVT, Op2,

11181 DAG.getConstant(0,DL, Op2.getValueType()),ISD::SETNE);

11182 }

11183

11184MVT ContainerVT = VecVT;

11185SDValue Vec =Op.getOperand(0);

11186if (VecVT.isFixedLengthVector()) {

11187 ContainerVT =getContainerForFixedLengthVector(VecVT);

11188 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

11189 }

11190

11191MVT XLenVT = Subtarget.getXLenVT();

11192auto [Mask, VL] =getDefaultVLOps(VecVT, ContainerVT,DL, DAG, Subtarget);

11193

11194// On some uarchs vrgather.vv will read from every input register for each

11195// output register, regardless of the indices. However to reverse a vector

11196// each output register only needs to read from one register. So decompose it

11197// into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of

11198// O(LMUL^2).

11199//

11200// vsetvli a1, zero, e64, m4, ta, ma

11201// vrgatherei16.vv v12, v8, v16

11202// ->

11203// vsetvli a1, zero, e64, m1, ta, ma

11204// vrgather.vv v15, v8, v16

11205// vrgather.vv v14, v9, v16

11206// vrgather.vv v13, v10, v16

11207// vrgather.vv v12, v11, v16

11208if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&

11209 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {

11210auto [Lo,Hi] = DAG.SplitVector(Vec,DL);

11211Lo = DAG.getNode(ISD::VECTOR_REVERSE,DL,Lo.getSimpleValueType(),Lo);

11212Hi = DAG.getNode(ISD::VECTOR_REVERSE,DL,Hi.getSimpleValueType(),Hi);

11213SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS,DL, ContainerVT,Hi,Lo);

11214

11215// Fixed length vectors might not fit exactly into their container, and so

11216// leave a gap in the front of the vector after being reversed. Slide this

11217// away.

11218//

11219// x x x x 3 2 1 0 <- v4i16 @ vlen=128

11220// 0 1 2 3 x x x x <- reverse

11221// x x x x 0 1 2 3 <- vslidedown.vx

11222if (VecVT.isFixedLengthVector()) {

11223SDValue Offset = DAG.getNode(

11224ISD::SUB,DL, XLenVT,

11225 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),

11226 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));

11227Concat =

11228getVSlidedown(DAG, Subtarget,DL, ContainerVT,

11229 DAG.getUNDEF(ContainerVT),Concat,Offset, Mask, VL);

11230Concat =convertFromScalableVector(VecVT,Concat, DAG, Subtarget);

11231 }

11232returnConcat;

11233 }

11234

11235unsigned EltSize = ContainerVT.getScalarSizeInBits();

11236unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();

11237unsigned VectorBitsMax = Subtarget.getRealMaxVLen();

11238unsigned MaxVLMAX =

11239 VecVT.isFixedLengthVector()

11240 ? VecVT.getVectorNumElements()

11241 :RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);

11242

11243unsigned GatherOpc =RISCVISD::VRGATHER_VV_VL;

11244MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();

11245

11246// If this is SEW=8 and VLMAX is potentially more than 256, we need

11247// to use vrgatherei16.vv.

11248if (MaxVLMAX > 256 && EltSize == 8) {

11249// If this is LMUL=8, we have to split before can use vrgatherei16.vv.

11250// Reverse each half, then reassemble them in reverse order.

11251// NOTE: It's also possible that after splitting that VLMAX no longer

11252// requires vrgatherei16.vv.

11253if (MinSize == (8 *RISCV::RVVBitsPerBlock)) {

11254auto [Lo,Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);

11255auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);

11256Lo = DAG.getNode(ISD::VECTOR_REVERSE,DL, LoVT,Lo);

11257Hi = DAG.getNode(ISD::VECTOR_REVERSE,DL, HiVT,Hi);

11258// Reassemble the low and high pieces reversed.

11259// FIXME: This is a CONCAT_VECTORS.

11260SDValue Res =

11261 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VecVT, DAG.getUNDEF(VecVT),Hi,

11262 DAG.getVectorIdxConstant(0,DL));

11263return DAG.getNode(

11264ISD::INSERT_SUBVECTOR,DL, VecVT, Res,Lo,

11265 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(),DL));

11266 }

11267

11268// Just promote the int type to i16 which will double the LMUL.

11269 IntVT =MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());

11270 GatherOpc =RISCVISD::VRGATHEREI16_VV_VL;

11271 }

11272

11273// At LMUL > 1, do the index computation in 16 bits to reduce register

11274// pressure.

11275if (IntVT.getScalarType().bitsGT(MVT::i16) &&

11276 IntVT.bitsGT(getLMUL1VT(IntVT))) {

11277assert(isUInt<16>(MaxVLMAX - 1));// Largest VLMAX is 65536 @ zvl65536b

11278 GatherOpc =RISCVISD::VRGATHEREI16_VV_VL;

11279 IntVT = IntVT.changeVectorElementType(MVT::i16);

11280 }

11281

11282// Calculate VLMAX-1 for the desired SEW.

11283SDValue VLMinus1 = DAG.getNode(

11284ISD::SUB,DL, XLenVT,

11285 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),

11286 DAG.getConstant(1,DL, XLenVT));

11287

11288// Splat VLMAX-1 taking care to handle SEW==64 on RV32.

11289bool IsRV32E64 =

11290 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;

11291SDValue SplatVL;

11292if (!IsRV32E64)

11293 SplatVL = DAG.getSplatVector(IntVT,DL, VLMinus1);

11294else

11295 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IntVT, DAG.getUNDEF(IntVT),

11296 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));

11297

11298SDValue VID = DAG.getNode(RISCVISD::VID_VL,DL, IntVT, Mask, VL);

11299SDValue Indices = DAG.getNode(RISCVISD::SUB_VL,DL, IntVT, SplatVL, VID,

11300 DAG.getUNDEF(IntVT), Mask, VL);

11301

11302SDValue Gather = DAG.getNode(GatherOpc,DL, ContainerVT, Vec, Indices,

11303 DAG.getUNDEF(ContainerVT), Mask, VL);

11304if (VecVT.isFixedLengthVector())

11305 Gather =convertFromScalableVector(VecVT, Gather, DAG, Subtarget);

11306return Gather;

11307}

11308

11309SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,

11310SelectionDAG &DAG) const{

11311SDLoc DL(Op);

11312SDValue V1 =Op.getOperand(0);

11313SDValue V2 =Op.getOperand(1);

11314MVT XLenVT = Subtarget.getXLenVT();

11315MVT VecVT =Op.getSimpleValueType();

11316

11317SDValue VLMax =computeVLMax(VecVT,DL, DAG);

11318

11319 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();

11320SDValue DownOffset, UpOffset;

11321if (ImmValue >= 0) {

11322// The operand is a TargetConstant, we need to rebuild it as a regular

11323// constant.

11324 DownOffset = DAG.getConstant(ImmValue,DL, XLenVT);

11325 UpOffset = DAG.getNode(ISD::SUB,DL, XLenVT, VLMax, DownOffset);

11326 }else {

11327// The operand is a TargetConstant, we need to rebuild it as a regular

11328// constant rather than negating the original operand.

11329 UpOffset = DAG.getConstant(-ImmValue,DL, XLenVT);

11330 DownOffset = DAG.getNode(ISD::SUB,DL, XLenVT, VLMax, UpOffset);

11331 }

11332

11333SDValue TrueMask =getAllOnesMask(VecVT, VLMax,DL, DAG);

11334

11335SDValue SlideDown =

11336getVSlidedown(DAG, Subtarget,DL, VecVT, DAG.getUNDEF(VecVT), V1,

11337 DownOffset, TrueMask, UpOffset);

11338returngetVSlideup(DAG, Subtarget,DL, VecVT, SlideDown, V2, UpOffset,

11339 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),

11340RISCVII::TAIL_AGNOSTIC);

11341}

11342

11343SDValue

11344RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,

11345SelectionDAG &DAG) const{

11346SDLoc DL(Op);

11347auto *Load = cast<LoadSDNode>(Op);

11348

11349assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

11350Load->getMemoryVT(),

11351 *Load->getMemOperand()) &&

11352"Expecting a correctly-aligned load");

11353

11354MVT VT =Op.getSimpleValueType();

11355MVT XLenVT = Subtarget.getXLenVT();

11356MVT ContainerVT =getContainerForFixedLengthVector(VT);

11357

11358// If we know the exact VLEN and our fixed length vector completely fills

11359// the container, use a whole register load instead.

11360constauto [MinVLMAX, MaxVLMAX] =

11361RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);

11362if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&

11363getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {

11364MachineMemOperand *MMO =Load->getMemOperand();

11365SDValue NewLoad =

11366 DAG.getLoad(ContainerVT,DL,Load->getChain(),Load->getBasePtr(),

11367 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),

11368 MMO->getAAInfo(), MMO->getRanges());

11369SDValue Result =convertFromScalableVector(VT, NewLoad, DAG, Subtarget);

11370return DAG.getMergeValues({Result, NewLoad.getValue(1)},DL);

11371 }

11372

11373SDValue VL = DAG.getConstant(VT.getVectorNumElements(),DL, XLenVT);

11374

11375bool IsMaskOp = VT.getVectorElementType() == MVT::i1;

11376SDValue IntID = DAG.getTargetConstant(

11377 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle,DL, XLenVT);

11378SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};

11379if (!IsMaskOp)

11380 Ops.push_back(DAG.getUNDEF(ContainerVT));

11381 Ops.push_back(Load->getBasePtr());

11382 Ops.push_back(VL);

11383SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});

11384SDValue NewLoad =

11385 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops,

11386Load->getMemoryVT(),Load->getMemOperand());

11387

11388SDValue Result =convertFromScalableVector(VT, NewLoad, DAG, Subtarget);

11389return DAG.getMergeValues({Result, NewLoad.getValue(1)},DL);

11390}

11391

11392SDValue

11393RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,

11394SelectionDAG &DAG) const{

11395SDLoc DL(Op);

11396auto *Store = cast<StoreSDNode>(Op);

11397

11398assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

11399Store->getMemoryVT(),

11400 *Store->getMemOperand()) &&

11401"Expecting a correctly-aligned store");

11402

11403SDValue StoreVal =Store->getValue();

11404MVT VT = StoreVal.getSimpleValueType();

11405MVT XLenVT = Subtarget.getXLenVT();

11406

11407// If the size less than a byte, we need to pad with zeros to make a byte.

11408if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {

11409 VT = MVT::v8i1;

11410 StoreVal =

11411 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, DAG.getConstant(0,DL, VT),

11412 StoreVal, DAG.getVectorIdxConstant(0,DL));

11413 }

11414

11415MVT ContainerVT =getContainerForFixedLengthVector(VT);

11416

11417SDValue NewValue =

11418convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);

11419

11420// If we know the exact VLEN and our fixed length vector completely fills

11421// the container, use a whole register store instead.

11422constauto [MinVLMAX, MaxVLMAX] =

11423RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);

11424if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&

11425getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {

11426MachineMemOperand *MMO =Store->getMemOperand();

11427return DAG.getStore(Store->getChain(),DL, NewValue,Store->getBasePtr(),

11428 MMO->getPointerInfo(), MMO->getBaseAlign(),

11429 MMO->getFlags(), MMO->getAAInfo());

11430 }

11431

11432SDValue VL = DAG.getConstant(VT.getVectorNumElements(),DL, XLenVT);

11433

11434bool IsMaskOp = VT.getVectorElementType() == MVT::i1;

11435SDValue IntID = DAG.getTargetConstant(

11436 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse,DL, XLenVT);

11437return DAG.getMemIntrinsicNode(

11438ISD::INTRINSIC_VOID,DL, DAG.getVTList(MVT::Other),

11439 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},

11440Store->getMemoryVT(),Store->getMemOperand());

11441}

11442

11443SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,

11444SelectionDAG &DAG) const{

11445SDLoc DL(Op);

11446MVT VT =Op.getSimpleValueType();

11447

11448constauto *MemSD = cast<MemSDNode>(Op);

11449EVT MemVT = MemSD->getMemoryVT();

11450MachineMemOperand *MMO = MemSD->getMemOperand();

11451SDValue Chain = MemSD->getChain();

11452SDValue BasePtr = MemSD->getBasePtr();

11453

11454SDValue Mask, PassThru, VL;

11455bool IsExpandingLoad =false;

11456if (constauto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {

11457Mask = VPLoad->getMask();

11458 PassThru = DAG.getUNDEF(VT);

11459 VL = VPLoad->getVectorLength();

11460 }else {

11461constauto *MLoad = cast<MaskedLoadSDNode>(Op);

11462Mask = MLoad->getMask();

11463 PassThru = MLoad->getPassThru();

11464 IsExpandingLoad = MLoad->isExpandingLoad();

11465 }

11466

11467bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());

11468

11469MVT XLenVT = Subtarget.getXLenVT();

11470

11471MVT ContainerVT = VT;

11472if (VT.isFixedLengthVector()) {

11473 ContainerVT =getContainerForFixedLengthVector(VT);

11474 PassThru =convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);

11475if (!IsUnmasked) {

11476MVT MaskVT =getMaskTypeFor(ContainerVT);

11477Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

11478 }

11479 }

11480

11481if (!VL)

11482 VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;

11483

11484SDValue ExpandingVL;

11485if (!IsUnmasked && IsExpandingLoad) {

11486 ExpandingVL = VL;

11487 VL =

11488 DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Mask,

11489getAllOnesMask(Mask.getSimpleValueType(), VL,DL, DAG), VL);

11490 }

11491

11492unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle

11493 : Intrinsic::riscv_vle_mask;

11494SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID,DL, XLenVT)};

11495if (IntID == Intrinsic::riscv_vle)

11496 Ops.push_back(DAG.getUNDEF(ContainerVT));

11497else

11498 Ops.push_back(PassThru);

11499 Ops.push_back(BasePtr);

11500if (IntID == Intrinsic::riscv_vle_mask)

11501 Ops.push_back(Mask);

11502 Ops.push_back(VL);

11503if (IntID == Intrinsic::riscv_vle_mask)

11504 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC,DL, XLenVT));

11505

11506SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});

11507

11508SDValue Result =

11509 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops, MemVT, MMO);

11510 Chain =Result.getValue(1);

11511if (ExpandingVL) {

11512MVT IndexVT = ContainerVT;

11513if (ContainerVT.isFloatingPoint())

11514 IndexVT = ContainerVT.changeVectorElementTypeToInteger();

11515

11516MVT IndexEltVT = IndexVT.getVectorElementType();

11517bool UseVRGATHEREI16 =false;

11518// If index vector is an i8 vector and the element count exceeds 256, we

11519// should change the element type of index vector to i16 to avoid

11520// overflow.

11521if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {

11522// FIXME: We need to do vector splitting manually for LMUL=8 cases.

11523assert(getLMUL(IndexVT) !=RISCVII::LMUL_8);

11524 IndexVT = IndexVT.changeVectorElementType(MVT::i16);

11525 UseVRGATHEREI16 =true;

11526 }

11527

11528SDValue Iota =

11529 DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, IndexVT,

11530 DAG.getConstant(Intrinsic::riscv_viota,DL, XLenVT),

11531 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);

11532Result =

11533 DAG.getNode(UseVRGATHEREI16 ?RISCVISD::VRGATHEREI16_VV_VL

11534 :RISCVISD::VRGATHER_VV_VL,

11535DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);

11536 }

11537

11538if (VT.isFixedLengthVector())

11539Result =convertFromScalableVector(VT, Result, DAG, Subtarget);

11540

11541return DAG.getMergeValues({Result, Chain},DL);

11542}

11543

11544SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,

11545SelectionDAG &DAG) const{

11546SDLoc DL(Op);

11547

11548constauto *MemSD = cast<MemSDNode>(Op);

11549EVT MemVT = MemSD->getMemoryVT();

11550MachineMemOperand *MMO = MemSD->getMemOperand();

11551SDValue Chain = MemSD->getChain();

11552SDValue BasePtr = MemSD->getBasePtr();

11553SDValue Val,Mask, VL;

11554

11555bool IsCompressingStore =false;

11556if (constauto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {

11557 Val = VPStore->getValue();

11558Mask = VPStore->getMask();

11559 VL = VPStore->getVectorLength();

11560 }else {

11561constauto *MStore = cast<MaskedStoreSDNode>(Op);

11562 Val = MStore->getValue();

11563Mask = MStore->getMask();

11564 IsCompressingStore = MStore->isCompressingStore();

11565 }

11566

11567bool IsUnmasked =

11568ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;

11569

11570MVT VT = Val.getSimpleValueType();

11571MVT XLenVT = Subtarget.getXLenVT();

11572

11573MVT ContainerVT = VT;

11574if (VT.isFixedLengthVector()) {

11575 ContainerVT =getContainerForFixedLengthVector(VT);

11576

11577 Val =convertToScalableVector(ContainerVT, Val, DAG, Subtarget);

11578if (!IsUnmasked || IsCompressingStore) {

11579MVT MaskVT =getMaskTypeFor(ContainerVT);

11580Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

11581 }

11582 }

11583

11584if (!VL)

11585 VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;

11586

11587if (IsCompressingStore) {

11588 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, ContainerVT,

11589 DAG.getConstant(Intrinsic::riscv_vcompress,DL, XLenVT),

11590 DAG.getUNDEF(ContainerVT), Val, Mask, VL);

11591 VL =

11592 DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Mask,

11593getAllOnesMask(Mask.getSimpleValueType(), VL,DL, DAG), VL);

11594 }

11595

11596unsigned IntID =

11597 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;

11598SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID,DL, XLenVT)};

11599 Ops.push_back(Val);

11600 Ops.push_back(BasePtr);

11601if (!IsUnmasked)

11602 Ops.push_back(Mask);

11603 Ops.push_back(VL);

11604

11605return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,DL,

11606 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);

11607}

11608

11609SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,

11610SelectionDAG &DAG) const{

11611SDLoc DL(Op);

11612SDValue Val =Op.getOperand(0);

11613SDValue Mask =Op.getOperand(1);

11614SDValue Passthru =Op.getOperand(2);

11615

11616MVT VT = Val.getSimpleValueType();

11617MVT XLenVT = Subtarget.getXLenVT();

11618MVT ContainerVT = VT;

11619if (VT.isFixedLengthVector()) {

11620 ContainerVT =getContainerForFixedLengthVector(VT);

11621MVT MaskVT =getMaskTypeFor(ContainerVT);

11622 Val =convertToScalableVector(ContainerVT, Val, DAG, Subtarget);

11623Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

11624 Passthru =convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);

11625 }

11626

11627SDValue VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;

11628SDValue Res =

11629 DAG.getNode(ISD::INTRINSIC_WO_CHAIN,DL, ContainerVT,

11630 DAG.getConstant(Intrinsic::riscv_vcompress,DL, XLenVT),

11631 Passthru, Val, Mask, VL);

11632

11633if (VT.isFixedLengthVector())

11634 Res =convertFromScalableVector(VT, Res, DAG, Subtarget);

11635

11636return Res;

11637}

11638

11639SDValue

11640RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,

11641SelectionDAG &DAG) const{

11642MVT InVT =Op.getOperand(0).getSimpleValueType();

11643MVT ContainerVT =getContainerForFixedLengthVector(InVT);

11644

11645MVT VT =Op.getSimpleValueType();

11646

11647SDValue Op1 =

11648convertToScalableVector(ContainerVT,Op.getOperand(0), DAG, Subtarget);

11649SDValue Op2 =

11650convertToScalableVector(ContainerVT,Op.getOperand(1), DAG, Subtarget);

11651

11652SDLoc DL(Op);

11653auto [Mask, VL] =getDefaultVLOps(VT.getVectorNumElements(), ContainerVT,DL,

11654 DAG, Subtarget);

11655MVT MaskVT =getMaskTypeFor(ContainerVT);

11656

11657SDValue Cmp =

11658 DAG.getNode(RISCVISD::SETCC_VL,DL, MaskVT,

11659 {Op1, Op2,Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});

11660

11661returnconvertFromScalableVector(VT, Cmp, DAG, Subtarget);

11662}

11663

11664SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,

11665SelectionDAG &DAG) const{

11666unsigned Opc =Op.getOpcode();

11667SDLoc DL(Op);

11668SDValue Chain =Op.getOperand(0);

11669SDValue Op1 =Op.getOperand(1);

11670SDValue Op2 =Op.getOperand(2);

11671SDValue CC =Op.getOperand(3);

11672ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();

11673MVT VT =Op.getSimpleValueType();

11674MVT InVT = Op1.getSimpleValueType();

11675

11676// RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE

11677// condition code.

11678if (Opc ==ISD::STRICT_FSETCCS) {

11679// Expand strict_fsetccs(x, oeq) to

11680// (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))

11681SDVTList VTList =Op->getVTList();

11682if (CCVal ==ISD::SETEQ || CCVal ==ISD::SETOEQ) {

11683SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);

11684SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS,DL, VTList, Chain, Op1,

11685 Op2, OLECCVal);

11686SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS,DL, VTList, Chain, Op2,

11687 Op1, OLECCVal);

11688SDValue OutChain = DAG.getNode(ISD::TokenFactor,DL, MVT::Other,

11689 Tmp1.getValue(1), Tmp2.getValue(1));

11690// Tmp1 and Tmp2 might be the same node.

11691if (Tmp1 != Tmp2)

11692 Tmp1 = DAG.getNode(ISD::AND,DL, VT, Tmp1, Tmp2);

11693return DAG.getMergeValues({Tmp1, OutChain},DL);

11694 }

11695

11696// Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))

11697if (CCVal ==ISD::SETNE || CCVal ==ISD::SETUNE) {

11698SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);

11699SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS,DL, VTList, Chain, Op1,

11700 Op2, OEQCCVal);

11701SDValue Res = DAG.getNOT(DL, OEQ, VT);

11702return DAG.getMergeValues({Res, OEQ.getValue(1)},DL);

11703 }

11704 }

11705

11706MVT ContainerInVT = InVT;

11707if (InVT.isFixedLengthVector()) {

11708 ContainerInVT =getContainerForFixedLengthVector(InVT);

11709 Op1 =convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);

11710 Op2 =convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);

11711 }

11712MVT MaskVT =getMaskTypeFor(ContainerInVT);

11713

11714auto [Mask, VL] =getDefaultVLOps(InVT, ContainerInVT,DL, DAG, Subtarget);

11715

11716SDValue Res;

11717if (Opc ==ISD::STRICT_FSETCC &&

11718 (CCVal ==ISD::SETLT || CCVal ==ISD::SETOLT || CCVal ==ISD::SETLE ||

11719 CCVal ==ISD::SETOLE)) {

11720// VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only

11721// active when both input elements are ordered.

11722SDValue True =getAllOnesMask(ContainerInVT, VL,DL, DAG);

11723SDValue OrderMask1 = DAG.getNode(

11724RISCVISD::STRICT_FSETCC_VL,DL, DAG.getVTList(MaskVT, MVT::Other),

11725 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),

11726 True, VL});

11727SDValue OrderMask2 = DAG.getNode(

11728RISCVISD::STRICT_FSETCC_VL,DL, DAG.getVTList(MaskVT, MVT::Other),

11729 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),

11730 True, VL});

11731Mask =

11732 DAG.getNode(RISCVISD::VMAND_VL,DL, MaskVT, OrderMask1, OrderMask2, VL);

11733// Use Mask as the passthru operand to let the result be 0 if either of the

11734// inputs is unordered.

11735 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL,DL,

11736 DAG.getVTList(MaskVT, MVT::Other),

11737 {Chain, Op1, Op2, CC, Mask, Mask, VL});

11738 }else {

11739unsigned RVVOpc = Opc ==ISD::STRICT_FSETCC ?RISCVISD::STRICT_FSETCC_VL

11740 :RISCVISD::STRICT_FSETCCS_VL;

11741 Res = DAG.getNode(RVVOpc,DL, DAG.getVTList(MaskVT, MVT::Other),

11742 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});

11743 }

11744

11745if (VT.isFixedLengthVector()) {

11746SDValue SubVec =convertFromScalableVector(VT, Res, DAG, Subtarget);

11747return DAG.getMergeValues({SubVec, Res.getValue(1)},DL);

11748 }

11749return Res;

11750}

11751

11752// Lower vector ABS to smax(X, sub(0, X)).

11753SDValue RISCVTargetLowering::lowerABS(SDValue Op,SelectionDAG &DAG) const{

11754SDLoc DL(Op);

11755MVT VT =Op.getSimpleValueType();

11756SDValue X =Op.getOperand(0);

11757

11758assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&

11759"Unexpected type for ISD::ABS");

11760

11761MVT ContainerVT = VT;

11762if (VT.isFixedLengthVector()) {

11763 ContainerVT =getContainerForFixedLengthVector(VT);

11764X =convertToScalableVector(ContainerVT,X, DAG, Subtarget);

11765 }

11766

11767SDValue Mask, VL;

11768if (Op->getOpcode() == ISD::VP_ABS) {

11769Mask =Op->getOperand(1);

11770if (VT.isFixedLengthVector())

11771Mask =convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,

11772 Subtarget);

11773 VL =Op->getOperand(2);

11774 }else

11775 std::tie(Mask, VL) =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

11776

11777SDValue SplatZero = DAG.getNode(

11778RISCVISD::VMV_V_X_VL,DL, ContainerVT, DAG.getUNDEF(ContainerVT),

11779 DAG.getConstant(0,DL, Subtarget.getXLenVT()), VL);

11780SDValue NegX = DAG.getNode(RISCVISD::SUB_VL,DL, ContainerVT, SplatZero,X,

11781 DAG.getUNDEF(ContainerVT), Mask, VL);

11782SDValue Max = DAG.getNode(RISCVISD::SMAX_VL,DL, ContainerVT,X, NegX,

11783 DAG.getUNDEF(ContainerVT), Mask, VL);

11784

11785if (VT.isFixedLengthVector())

11786Max =convertFromScalableVector(VT, Max, DAG, Subtarget);

11787returnMax;

11788}

11789

11790SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(

11791SDValue Op,SelectionDAG &DAG) const{

11792SDLoc DL(Op);

11793MVT VT =Op.getSimpleValueType();

11794SDValue Mag =Op.getOperand(0);

11795SDValue Sign =Op.getOperand(1);

11796assert(Mag.getValueType() == Sign.getValueType() &&

11797"Can only handle COPYSIGN with matching types.");

11798

11799MVT ContainerVT =getContainerForFixedLengthVector(VT);

11800 Mag =convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);

11801 Sign =convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);

11802

11803auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

11804

11805SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL,DL, ContainerVT, Mag,

11806 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);

11807

11808returnconvertFromScalableVector(VT, CopySign, DAG, Subtarget);

11809}

11810

11811SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(

11812SDValue Op,SelectionDAG &DAG) const{

11813MVT VT =Op.getSimpleValueType();

11814MVT ContainerVT =getContainerForFixedLengthVector(VT);

11815

11816MVT I1ContainerVT =

11817MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());

11818

11819SDValue CC =

11820convertToScalableVector(I1ContainerVT,Op.getOperand(0), DAG, Subtarget);

11821SDValue Op1 =

11822convertToScalableVector(ContainerVT,Op.getOperand(1), DAG, Subtarget);

11823SDValue Op2 =

11824convertToScalableVector(ContainerVT,Op.getOperand(2), DAG, Subtarget);

11825

11826SDLoc DL(Op);

11827SDValue VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;

11828

11829SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT,CC, Op1,

11830 Op2, DAG.getUNDEF(ContainerVT), VL);

11831

11832returnconvertFromScalableVector(VT,Select, DAG, Subtarget);

11833}

11834

11835SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,

11836SelectionDAG &DAG) const{

11837unsigned NewOpc =getRISCVVLOp(Op);

11838bool HasPassthruOp =hasPassthruOp(NewOpc);

11839bool HasMask =hasMaskOp(NewOpc);

11840

11841MVT VT =Op.getSimpleValueType();

11842MVT ContainerVT =getContainerForFixedLengthVector(VT);

11843

11844// Create list of operands by converting existing ones to scalable types.

11845SmallVector<SDValue, 6> Ops;

11846for (constSDValue &V :Op->op_values()) {

11847assert(!isa<VTSDNode>(V) &&"Unexpected VTSDNode node!");

11848

11849// Pass through non-vector operands.

11850if (!V.getValueType().isVector()) {

11851 Ops.push_back(V);

11852continue;

11853 }

11854

11855// "cast" fixed length vector to a scalable vector.

11856assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&

11857"Only fixed length vectors are supported!");

11858 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));

11859 }

11860

11861SDLoc DL(Op);

11862auto [Mask, VL] =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget);

11863if (HasPassthruOp)

11864 Ops.push_back(DAG.getUNDEF(ContainerVT));

11865if (HasMask)

11866 Ops.push_back(Mask);

11867 Ops.push_back(VL);

11868

11869// StrictFP operations have two result values. Their lowered result should

11870// have same result count.

11871if (Op->isStrictFPOpcode()) {

11872SDValue ScalableRes =

11873 DAG.getNode(NewOpc,DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,

11874Op->getFlags());

11875SDValue SubVec =convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);

11876return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)},DL);

11877 }

11878

11879SDValue ScalableRes =

11880 DAG.getNode(NewOpc,DL, ContainerVT, Ops,Op->getFlags());

11881returnconvertFromScalableVector(VT, ScalableRes, DAG, Subtarget);

11882}

11883

11884// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:

11885// * Operands of each node are assumed to be in the same order.

11886// * The EVL operand is promoted from i32 to i64 on RV64.

11887// * Fixed-length vectors are converted to their scalable-vector container

11888// types.

11889SDValue RISCVTargetLowering::lowerVPOp(SDValue Op,SelectionDAG &DAG) const{

11890unsigned RISCVISDOpc =getRISCVVLOp(Op);

11891bool HasPassthruOp =hasPassthruOp(RISCVISDOpc);

11892

11893SDLoc DL(Op);

11894MVT VT =Op.getSimpleValueType();

11895SmallVector<SDValue, 4> Ops;

11896

11897MVT ContainerVT = VT;

11898if (VT.isFixedLengthVector())

11899 ContainerVT =getContainerForFixedLengthVector(VT);

11900

11901for (constauto &OpIdx :enumerate(Op->ops())) {

11902SDValue V = OpIdx.value();

11903assert(!isa<VTSDNode>(V) &&"Unexpected VTSDNode node!");

11904// Add dummy passthru value before the mask. Or if there isn't a mask,

11905// before EVL.

11906if (HasPassthruOp) {

11907auto MaskIdx =ISD::getVPMaskIdx(Op.getOpcode());

11908if (MaskIdx) {

11909if (*MaskIdx == OpIdx.index())

11910 Ops.push_back(DAG.getUNDEF(ContainerVT));

11911 }elseif (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==

11912 OpIdx.index()) {

11913if (Op.getOpcode() == ISD::VP_MERGE) {

11914// For VP_MERGE, copy the false operand instead of an undef value.

11915 Ops.push_back(Ops.back());

11916 }else {

11917assert(Op.getOpcode() == ISD::VP_SELECT);

11918// For VP_SELECT, add an undef value.

11919 Ops.push_back(DAG.getUNDEF(ContainerVT));

11920 }

11921 }

11922 }

11923// VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.

11924if (RISCVISDOpc ==RISCVISD::VFCVT_RM_X_F_VL &&

11925ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())

11926 Ops.push_back(DAG.getTargetConstant(RISCVFPRndMode::DYN,DL,

11927 Subtarget.getXLenVT()));

11928// Pass through operands which aren't fixed-length vectors.

11929if (!V.getValueType().isFixedLengthVector()) {

11930 Ops.push_back(V);

11931continue;

11932 }

11933// "cast" fixed length vector to a scalable vector.

11934MVT OpVT =V.getSimpleValueType();

11935MVT ContainerVT =getContainerForFixedLengthVector(OpVT);

11936assert(useRVVForFixedLengthVectorVT(OpVT) &&

11937"Only fixed length vectors are supported!");

11938 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));

11939 }

11940

11941if (!VT.isFixedLengthVector())

11942return DAG.getNode(RISCVISDOpc,DL, VT, Ops,Op->getFlags());

11943

11944SDValue VPOp = DAG.getNode(RISCVISDOpc,DL, ContainerVT, Ops,Op->getFlags());

11945

11946returnconvertFromScalableVector(VT, VPOp, DAG, Subtarget);

11947}

11948

11949SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,

11950SelectionDAG &DAG) const{

11951SDLoc DL(Op);

11952MVT VT =Op.getSimpleValueType();

11953

11954SDValue Src =Op.getOperand(0);

11955// NOTE: Mask is dropped.

11956SDValue VL =Op.getOperand(2);

11957

11958MVT ContainerVT = VT;

11959if (VT.isFixedLengthVector()) {

11960 ContainerVT =getContainerForFixedLengthVector(VT);

11961MVT SrcVT =MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());

11962 Src =convertToScalableVector(SrcVT, Src, DAG, Subtarget);

11963 }

11964

11965MVT XLenVT = Subtarget.getXLenVT();

11966SDValue Zero = DAG.getConstant(0,DL, XLenVT);

11967SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

11968 DAG.getUNDEF(ContainerVT), Zero, VL);

11969

11970SDValue SplatValue = DAG.getSignedConstant(

11971Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1,DL, XLenVT);

11972SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

11973 DAG.getUNDEF(ContainerVT), SplatValue, VL);

11974

11975SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, Src,Splat,

11976 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);

11977if (!VT.isFixedLengthVector())

11978returnResult;

11979returnconvertFromScalableVector(VT, Result, DAG, Subtarget);

11980}

11981

11982SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,

11983SelectionDAG &DAG) const{

11984SDLoc DL(Op);

11985MVT VT =Op.getSimpleValueType();

11986

11987SDValue Op1 =Op.getOperand(0);

11988SDValue Op2 =Op.getOperand(1);

11989ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();

11990// NOTE: Mask is dropped.

11991SDValue VL =Op.getOperand(4);

11992

11993MVT ContainerVT = VT;

11994if (VT.isFixedLengthVector()) {

11995 ContainerVT =getContainerForFixedLengthVector(VT);

11996 Op1 =convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);

11997 Op2 =convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);

11998 }

11999

12000SDValue Result;

12001SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL,DL, ContainerVT, VL);

12002

12003switch (Condition) {

12004default:

12005break;

12006// X != Y --> (X^Y)

12007caseISD::SETNE:

12008Result = DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op1, Op2, VL);

12009break;

12010// X == Y --> ~(X^Y)

12011caseISD::SETEQ: {

12012SDValue Temp =

12013 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op1, Op2, VL);

12014Result =

12015 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Temp, AllOneMask, VL);

12016break;

12017 }

12018// X >s Y --> X == 0 & Y == 1 --> ~X & Y

12019// X <u Y --> X == 0 & Y == 1 --> ~X & Y

12020caseISD::SETGT:

12021caseISD::SETULT: {

12022SDValue Temp =

12023 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op1, AllOneMask, VL);

12024Result = DAG.getNode(RISCVISD::VMAND_VL,DL, ContainerVT, Temp, Op2, VL);

12025break;

12026 }

12027// X <s Y --> X == 1 & Y == 0 --> ~Y & X

12028// X >u Y --> X == 1 & Y == 0 --> ~Y & X

12029caseISD::SETLT:

12030caseISD::SETUGT: {

12031SDValue Temp =

12032 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op2, AllOneMask, VL);

12033Result = DAG.getNode(RISCVISD::VMAND_VL,DL, ContainerVT, Op1, Temp, VL);

12034break;

12035 }

12036// X >=s Y --> X == 0 | Y == 1 --> ~X | Y

12037// X <=u Y --> X == 0 | Y == 1 --> ~X | Y

12038caseISD::SETGE:

12039caseISD::SETULE: {

12040SDValue Temp =

12041 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op1, AllOneMask, VL);

12042Result = DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Temp, Op2, VL);

12043break;

12044 }

12045// X <=s Y --> X == 1 | Y == 0 --> ~Y | X

12046// X >=u Y --> X == 1 | Y == 0 --> ~Y | X

12047caseISD::SETLE:

12048caseISD::SETUGE: {

12049SDValue Temp =

12050 DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Op2, AllOneMask, VL);

12051Result = DAG.getNode(RISCVISD::VMXOR_VL,DL, ContainerVT, Temp, Op1, VL);

12052break;

12053 }

12054 }

12055

12056if (!VT.isFixedLengthVector())

12057returnResult;

12058returnconvertFromScalableVector(VT, Result, DAG, Subtarget);

12059}

12060

12061// Lower Floating-Point/Integer Type-Convert VP SDNodes

12062SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,

12063SelectionDAG &DAG) const{

12064SDLoc DL(Op);

12065

12066SDValue Src =Op.getOperand(0);

12067SDValue Mask =Op.getOperand(1);

12068SDValue VL =Op.getOperand(2);

12069unsigned RISCVISDOpc =getRISCVVLOp(Op);

12070

12071MVT DstVT =Op.getSimpleValueType();

12072MVT SrcVT = Src.getSimpleValueType();

12073if (DstVT.isFixedLengthVector()) {

12074 DstVT =getContainerForFixedLengthVector(DstVT);

12075 SrcVT =getContainerForFixedLengthVector(SrcVT);

12076 Src =convertToScalableVector(SrcVT, Src, DAG, Subtarget);

12077MVT MaskVT =getMaskTypeFor(DstVT);

12078Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

12079 }

12080

12081unsigned DstEltSize = DstVT.getScalarSizeInBits();

12082unsigned SrcEltSize = SrcVT.getScalarSizeInBits();

12083

12084SDValue Result;

12085if (DstEltSize >= SrcEltSize) {// Single-width and widening conversion.

12086if (SrcVT.isInteger()) {

12087assert(DstVT.isFloatingPoint() &&"Wrong input/output vector types");

12088

12089unsigned RISCVISDExtOpc = RISCVISDOpc ==RISCVISD::SINT_TO_FP_VL

12090 ?RISCVISD::VSEXT_VL

12091 :RISCVISD::VZEXT_VL;

12092

12093// Do we need to do any pre-widening before converting?

12094if (SrcEltSize == 1) {

12095MVT IntVT = DstVT.changeVectorElementTypeToInteger();

12096MVT XLenVT = Subtarget.getXLenVT();

12097SDValue Zero = DAG.getConstant(0,DL, XLenVT);

12098SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IntVT,

12099 DAG.getUNDEF(IntVT), Zero, VL);

12100SDValue One = DAG.getSignedConstant(

12101 RISCVISDExtOpc ==RISCVISD::VZEXT_VL ? 1 : -1,DL, XLenVT);

12102SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IntVT,

12103 DAG.getUNDEF(IntVT), One, VL);

12104 Src = DAG.getNode(RISCVISD::VMERGE_VL,DL, IntVT, Src, OneSplat,

12105 ZeroSplat, DAG.getUNDEF(IntVT), VL);

12106 }elseif (DstEltSize > (2 * SrcEltSize)) {

12107// Widen before converting.

12108MVT IntVT =MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),

12109 DstVT.getVectorElementCount());

12110 Src = DAG.getNode(RISCVISDExtOpc,DL, IntVT, Src, Mask, VL);

12111 }

12112

12113Result = DAG.getNode(RISCVISDOpc,DL, DstVT, Src, Mask, VL);

12114 }else {

12115assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&

12116"Wrong input/output vector types");

12117

12118// Convert f16 to f32 then convert f32 to i64.

12119if (DstEltSize > (2 * SrcEltSize)) {

12120assert(SrcVT.getVectorElementType() == MVT::f16 &&"Unexpected type!");

12121MVT InterimFVT =

12122MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());

12123 Src =

12124 DAG.getNode(RISCVISD::FP_EXTEND_VL,DL, InterimFVT, Src, Mask, VL);

12125 }

12126

12127Result = DAG.getNode(RISCVISDOpc,DL, DstVT, Src, Mask, VL);

12128 }

12129 }else {// Narrowing + Conversion

12130if (SrcVT.isInteger()) {

12131assert(DstVT.isFloatingPoint() &&"Wrong input/output vector types");

12132// First do a narrowing convert to an FP type half the size, then round

12133// the FP type to a small FP type if needed.

12134

12135MVT InterimFVT = DstVT;

12136if (SrcEltSize > (2 * DstEltSize)) {

12137assert(SrcEltSize == (4 * DstEltSize) &&"Unexpected types!");

12138assert(DstVT.getVectorElementType() == MVT::f16 &&"Unexpected type!");

12139 InterimFVT =MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());

12140 }

12141

12142Result = DAG.getNode(RISCVISDOpc,DL, InterimFVT, Src, Mask, VL);

12143

12144if (InterimFVT != DstVT) {

12145 Src =Result;

12146Result = DAG.getNode(RISCVISD::FP_ROUND_VL,DL, DstVT, Src, Mask, VL);

12147 }

12148 }else {

12149assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&

12150"Wrong input/output vector types");

12151// First do a narrowing conversion to an integer half the size, then

12152// truncate if needed.

12153

12154if (DstEltSize == 1) {

12155// First convert to the same size integer, then convert to mask using

12156// setcc.

12157assert(SrcEltSize >= 16 &&"Unexpected FP type!");

12158MVT InterimIVT =MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),

12159 DstVT.getVectorElementCount());

12160Result = DAG.getNode(RISCVISDOpc,DL, InterimIVT, Src, Mask, VL);

12161

12162// Compare the integer result to 0. The integer should be 0 or 1/-1,

12163// otherwise the conversion was undefined.

12164MVT XLenVT = Subtarget.getXLenVT();

12165SDValue SplatZero = DAG.getConstant(0,DL, XLenVT);

12166 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, InterimIVT,

12167 DAG.getUNDEF(InterimIVT), SplatZero, VL);

12168Result = DAG.getNode(RISCVISD::SETCC_VL,DL, DstVT,

12169 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),

12170 DAG.getUNDEF(DstVT), Mask, VL});

12171 }else {

12172MVT InterimIVT =MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),

12173 DstVT.getVectorElementCount());

12174

12175Result = DAG.getNode(RISCVISDOpc,DL, InterimIVT, Src, Mask, VL);

12176

12177while (InterimIVT != DstVT) {

12178 SrcEltSize /= 2;

12179 Src =Result;

12180 InterimIVT =MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),

12181 DstVT.getVectorElementCount());

12182Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL,DL, InterimIVT,

12183 Src, Mask, VL);

12184 }

12185 }

12186 }

12187 }

12188

12189MVT VT =Op.getSimpleValueType();

12190if (!VT.isFixedLengthVector())

12191returnResult;

12192returnconvertFromScalableVector(VT, Result, DAG, Subtarget);

12193}

12194

12195SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,

12196SelectionDAG &DAG) const{

12197SDLoc DL(Op);

12198MVT VT =Op.getSimpleValueType();

12199MVT XLenVT = Subtarget.getXLenVT();

12200

12201SDValue Mask =Op.getOperand(0);

12202SDValue TrueVal =Op.getOperand(1);

12203SDValue FalseVal =Op.getOperand(2);

12204SDValue VL =Op.getOperand(3);

12205

12206// Use default legalization if a vector of EVL type would be legal.

12207EVT EVLVecVT =EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),

12208 VT.getVectorElementCount());

12209if (isTypeLegal(EVLVecVT))

12210returnSDValue();

12211

12212MVT ContainerVT = VT;

12213if (VT.isFixedLengthVector()) {

12214 ContainerVT =getContainerForFixedLengthVector(VT);

12215Mask =convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);

12216TrueVal =convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);

12217FalseVal =convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);

12218 }

12219

12220// Promote to a vector of i8.

12221MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);

12222

12223// Promote TrueVal and FalseVal using VLMax.

12224// FIXME: Is there a better way to do this?

12225SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);

12226SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, PromotedVT,

12227 DAG.getUNDEF(PromotedVT),

12228 DAG.getConstant(1,DL, XLenVT), VLMax);

12229SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, PromotedVT,

12230 DAG.getUNDEF(PromotedVT),

12231 DAG.getConstant(0,DL, XLenVT), VLMax);

12232TrueVal = DAG.getNode(RISCVISD::VMERGE_VL,DL, PromotedVT, TrueVal, SplatOne,

12233 SplatZero, DAG.getUNDEF(PromotedVT), VL);

12234// Any element past VL uses FalseVal, so use VLMax

12235FalseVal = DAG.getNode(RISCVISD::VMERGE_VL,DL, PromotedVT, FalseVal,

12236 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);

12237

12238// VP_MERGE the two promoted values.

12239SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL,DL, PromotedVT, Mask,

12240 TrueVal, FalseVal, FalseVal, VL);

12241

12242// Convert back to mask.

12243SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL,DL, ContainerVT, VL);

12244SDValue Result = DAG.getNode(

12245RISCVISD::SETCC_VL,DL, ContainerVT,

12246 {VPMerge, DAG.getConstant(0,DL, PromotedVT), DAG.getCondCode(ISD::SETNE),

12247 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});

12248

12249if (VT.isFixedLengthVector())

12250Result =convertFromScalableVector(VT, Result, DAG, Subtarget);

12251returnResult;

12252}

12253

12254SDValue

12255RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,

12256SelectionDAG &DAG) const{

12257SDLoc DL(Op);

12258

12259SDValue Op1 =Op.getOperand(0);

12260SDValue Op2 =Op.getOperand(1);

12261SDValue Offset =Op.getOperand(2);

12262SDValue Mask =Op.getOperand(3);

12263SDValue EVL1 =Op.getOperand(4);

12264SDValue EVL2 =Op.getOperand(5);

12265

12266constMVT XLenVT = Subtarget.getXLenVT();

12267MVT VT =Op.getSimpleValueType();

12268MVT ContainerVT = VT;

12269if (VT.isFixedLengthVector()) {

12270 ContainerVT =getContainerForFixedLengthVector(VT);

12271 Op1 =convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);

12272 Op2 =convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);

12273MVT MaskVT =getMaskTypeFor(ContainerVT);

12274Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

12275 }

12276

12277bool IsMaskVector = VT.getVectorElementType() == MVT::i1;

12278if (IsMaskVector) {

12279 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);

12280

12281// Expand input operands

12282SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

12283 DAG.getUNDEF(ContainerVT),

12284 DAG.getConstant(1,DL, XLenVT), EVL1);

12285SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

12286 DAG.getUNDEF(ContainerVT),

12287 DAG.getConstant(0,DL, XLenVT), EVL1);

12288 Op1 = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, Op1, SplatOneOp1,

12289 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);

12290

12291SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

12292 DAG.getUNDEF(ContainerVT),

12293 DAG.getConstant(1,DL, XLenVT), EVL2);

12294SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

12295 DAG.getUNDEF(ContainerVT),

12296 DAG.getConstant(0,DL, XLenVT), EVL2);

12297 Op2 = DAG.getNode(RISCVISD::VMERGE_VL,DL, ContainerVT, Op2, SplatOneOp2,

12298 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);

12299 }

12300

12301 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();

12302SDValue DownOffset, UpOffset;

12303if (ImmValue >= 0) {

12304// The operand is a TargetConstant, we need to rebuild it as a regular

12305// constant.

12306 DownOffset = DAG.getConstant(ImmValue,DL, XLenVT);

12307 UpOffset = DAG.getNode(ISD::SUB,DL, XLenVT, EVL1, DownOffset);

12308 }else {

12309// The operand is a TargetConstant, we need to rebuild it as a regular

12310// constant rather than negating the original operand.

12311 UpOffset = DAG.getConstant(-ImmValue,DL, XLenVT);

12312 DownOffset = DAG.getNode(ISD::SUB,DL, XLenVT, EVL1, UpOffset);

12313 }

12314

12315SDValue SlideDown =

12316getVSlidedown(DAG, Subtarget,DL, ContainerVT, DAG.getUNDEF(ContainerVT),

12317 Op1, DownOffset, Mask, UpOffset);

12318SDValue Result =getVSlideup(DAG, Subtarget,DL, ContainerVT, SlideDown, Op2,

12319 UpOffset, Mask, EVL2,RISCVII::TAIL_AGNOSTIC);

12320

12321if (IsMaskVector) {

12322// Truncate Result back to a mask vector (Result has same EVL as Op2)

12323Result = DAG.getNode(

12324RISCVISD::SETCC_VL,DL, ContainerVT.changeVectorElementType(MVT::i1),

12325 {Result, DAG.getConstant(0, DL, ContainerVT),

12326 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),

12327 Mask, EVL2});

12328 }

12329

12330if (!VT.isFixedLengthVector())

12331returnResult;

12332returnconvertFromScalableVector(VT, Result, DAG, Subtarget);

12333}

12334

12335SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,

12336SelectionDAG &DAG) const{

12337SDLoc DL(Op);

12338SDValue Val =Op.getOperand(0);

12339SDValue Mask =Op.getOperand(1);

12340SDValue VL =Op.getOperand(2);

12341MVT VT =Op.getSimpleValueType();

12342

12343MVT ContainerVT = VT;

12344if (VT.isFixedLengthVector()) {

12345 ContainerVT =getContainerForFixedLengthVector(VT);

12346MVT MaskVT =getMaskTypeFor(ContainerVT);

12347Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

12348 }

12349

12350SDValue Result =

12351lowerScalarSplat(SDValue(), Val, VL, ContainerVT,DL, DAG, Subtarget);

12352

12353if (!VT.isFixedLengthVector())

12354returnResult;

12355returnconvertFromScalableVector(VT, Result, DAG, Subtarget);

12356}

12357

12358SDValue

12359RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,

12360SelectionDAG &DAG) const{

12361SDLoc DL(Op);

12362MVT VT =Op.getSimpleValueType();

12363MVT XLenVT = Subtarget.getXLenVT();

12364

12365SDValue Op1 =Op.getOperand(0);

12366SDValue Mask =Op.getOperand(1);

12367SDValue EVL =Op.getOperand(2);

12368

12369MVT ContainerVT = VT;

12370if (VT.isFixedLengthVector()) {

12371 ContainerVT =getContainerForFixedLengthVector(VT);

12372 Op1 =convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);

12373MVT MaskVT =getMaskTypeFor(ContainerVT);

12374Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

12375 }

12376

12377MVT GatherVT = ContainerVT;

12378MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();

12379// Check if we are working with mask vectors

12380bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;

12381if (IsMaskVector) {

12382 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);

12383

12384// Expand input operand

12385SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IndicesVT,

12386 DAG.getUNDEF(IndicesVT),

12387 DAG.getConstant(1,DL, XLenVT), EVL);

12388SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IndicesVT,

12389 DAG.getUNDEF(IndicesVT),

12390 DAG.getConstant(0,DL, XLenVT), EVL);

12391 Op1 = DAG.getNode(RISCVISD::VMERGE_VL,DL, IndicesVT, Op1, SplatOne,

12392 SplatZero, DAG.getUNDEF(IndicesVT), EVL);

12393 }

12394

12395unsigned EltSize = GatherVT.getScalarSizeInBits();

12396unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();

12397unsigned VectorBitsMax = Subtarget.getRealMaxVLen();

12398unsigned MaxVLMAX =

12399RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);

12400

12401unsigned GatherOpc =RISCVISD::VRGATHER_VV_VL;

12402// If this is SEW=8 and VLMAX is unknown or more than 256, we need

12403// to use vrgatherei16.vv.

12404// TODO: It's also possible to use vrgatherei16.vv for other types to

12405// decrease register width for the index calculation.

12406// NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.

12407if (MaxVLMAX > 256 && EltSize == 8) {

12408// If this is LMUL=8, we have to split before using vrgatherei16.vv.

12409// Split the vector in half and reverse each half using a full register

12410// reverse.

12411// Swap the halves and concatenate them.

12412// Slide the concatenated result by (VLMax - VL).

12413if (MinSize == (8 *RISCV::RVVBitsPerBlock)) {

12414auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);

12415auto [Lo,Hi] = DAG.SplitVector(Op1,DL);

12416

12417SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE,DL, LoVT,Lo);

12418SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE,DL, HiVT,Hi);

12419

12420// Reassemble the low and high pieces reversed.

12421// NOTE: this Result is unmasked (because we do not need masks for

12422// shuffles). If in the future this has to change, we can use a SELECT_VL

12423// between Result and UNDEF using the mask originally passed to VP_REVERSE

12424SDValue Result =

12425 DAG.getNode(ISD::CONCAT_VECTORS,DL, GatherVT, HiRev, LoRev);

12426

12427// Slide off any elements from past EVL that were reversed into the low

12428// elements.

12429unsigned MinElts = GatherVT.getVectorMinNumElements();

12430SDValue VLMax =

12431 DAG.getVScale(DL, XLenVT,APInt(XLenVT.getSizeInBits(), MinElts));

12432SDValue Diff = DAG.getNode(ISD::SUB,DL, XLenVT, VLMax, EVL);

12433

12434Result =getVSlidedown(DAG, Subtarget,DL, GatherVT,

12435 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);

12436

12437if (IsMaskVector) {

12438// Truncate Result back to a mask vector

12439Result =

12440 DAG.getNode(RISCVISD::SETCC_VL,DL, ContainerVT,

12441 {Result, DAG.getConstant(0,DL, GatherVT),

12442 DAG.getCondCode(ISD::SETNE),

12443 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});

12444 }

12445

12446if (!VT.isFixedLengthVector())

12447returnResult;

12448returnconvertFromScalableVector(VT, Result, DAG, Subtarget);

12449 }

12450

12451// Just promote the int type to i16 which will double the LMUL.

12452 IndicesVT =MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());

12453 GatherOpc =RISCVISD::VRGATHEREI16_VV_VL;

12454 }

12455

12456SDValue VID = DAG.getNode(RISCVISD::VID_VL,DL, IndicesVT, Mask, EVL);

12457SDValue VecLen =

12458 DAG.getNode(ISD::SUB,DL, XLenVT, EVL, DAG.getConstant(1,DL, XLenVT));

12459SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, IndicesVT,

12460 DAG.getUNDEF(IndicesVT), VecLen, EVL);

12461SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL,DL, IndicesVT, VecLenSplat, VID,

12462 DAG.getUNDEF(IndicesVT), Mask, EVL);

12463SDValue Result = DAG.getNode(GatherOpc,DL, GatherVT, Op1, VRSUB,

12464 DAG.getUNDEF(GatherVT), Mask, EVL);

12465

12466if (IsMaskVector) {

12467// Truncate Result back to a mask vector

12468Result = DAG.getNode(

12469RISCVISD::SETCC_VL,DL, ContainerVT,

12470 {Result, DAG.getConstant(0,DL, GatherVT), DAG.getCondCode(ISD::SETNE),

12471 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});

12472 }

12473

12474if (!VT.isFixedLengthVector())

12475returnResult;

12476returnconvertFromScalableVector(VT, Result, DAG, Subtarget);

12477}

12478

12479SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,

12480SelectionDAG &DAG) const{

12481MVT VT =Op.getSimpleValueType();

12482if (VT.getVectorElementType() != MVT::i1)

12483return lowerVPOp(Op, DAG);

12484

12485// It is safe to drop mask parameter as masked-off elements are undef.

12486SDValue Op1 =Op->getOperand(0);

12487SDValue Op2 =Op->getOperand(1);

12488SDValue VL =Op->getOperand(3);

12489

12490MVT ContainerVT = VT;

12491constbool IsFixed = VT.isFixedLengthVector();

12492if (IsFixed) {

12493 ContainerVT =getContainerForFixedLengthVector(VT);

12494 Op1 =convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);

12495 Op2 =convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);

12496 }

12497

12498SDLoc DL(Op);

12499SDValue Val = DAG.getNode(getRISCVVLOp(Op),DL, ContainerVT, Op1, Op2, VL);

12500if (!IsFixed)

12501return Val;

12502returnconvertFromScalableVector(VT, Val, DAG, Subtarget);

12503}

12504

12505SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,

12506SelectionDAG &DAG) const{

12507SDLoc DL(Op);

12508MVT XLenVT = Subtarget.getXLenVT();

12509MVT VT =Op.getSimpleValueType();

12510MVT ContainerVT = VT;

12511if (VT.isFixedLengthVector())

12512 ContainerVT =getContainerForFixedLengthVector(VT);

12513

12514SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});

12515

12516auto *VPNode = cast<VPStridedLoadSDNode>(Op);

12517// Check if the mask is known to be all ones

12518SDValue Mask = VPNode->getMask();

12519bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());

12520

12521SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse

12522 : Intrinsic::riscv_vlse_mask,

12523DL, XLenVT);

12524SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,

12525 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),

12526 VPNode->getStride()};

12527if (!IsUnmasked) {

12528if (VT.isFixedLengthVector()) {

12529MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);

12530Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

12531 }

12532 Ops.push_back(Mask);

12533 }

12534 Ops.push_back(VPNode->getVectorLength());

12535if (!IsUnmasked) {

12536SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC,DL, XLenVT);

12537 Ops.push_back(Policy);

12538 }

12539

12540SDValue Result =

12541 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops,

12542 VPNode->getMemoryVT(), VPNode->getMemOperand());

12543SDValue Chain =Result.getValue(1);

12544

12545if (VT.isFixedLengthVector())

12546Result =convertFromScalableVector(VT, Result, DAG, Subtarget);

12547

12548return DAG.getMergeValues({Result, Chain},DL);

12549}

12550

12551SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,

12552SelectionDAG &DAG) const{

12553SDLoc DL(Op);

12554MVT XLenVT = Subtarget.getXLenVT();

12555

12556auto *VPNode = cast<VPStridedStoreSDNode>(Op);

12557SDValue StoreVal = VPNode->getValue();

12558MVT VT = StoreVal.getSimpleValueType();

12559MVT ContainerVT = VT;

12560if (VT.isFixedLengthVector()) {

12561 ContainerVT =getContainerForFixedLengthVector(VT);

12562 StoreVal =convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);

12563 }

12564

12565// Check if the mask is known to be all ones

12566SDValue Mask = VPNode->getMask();

12567bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());

12568

12569SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse

12570 : Intrinsic::riscv_vsse_mask,

12571DL, XLenVT);

12572SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,

12573 VPNode->getBasePtr(), VPNode->getStride()};

12574if (!IsUnmasked) {

12575if (VT.isFixedLengthVector()) {

12576MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);

12577Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

12578 }

12579 Ops.push_back(Mask);

12580 }

12581 Ops.push_back(VPNode->getVectorLength());

12582

12583return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,DL, VPNode->getVTList(),

12584 Ops, VPNode->getMemoryVT(),

12585 VPNode->getMemOperand());

12586}

12587

12588// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be

12589// matched to a RVV indexed load. The RVV indexed load instructions only

12590// support the "unsigned unscaled" addressing mode; indices are implicitly

12591// zero-extended or truncated to XLEN and are treated as byte offsets. Any

12592// signed or scaled indexing is extended to the XLEN value type and scaled

12593// accordingly.

12594SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,

12595SelectionDAG &DAG) const{

12596SDLoc DL(Op);

12597MVT VT =Op.getSimpleValueType();

12598

12599constauto *MemSD = cast<MemSDNode>(Op.getNode());

12600EVT MemVT = MemSD->getMemoryVT();

12601MachineMemOperand *MMO = MemSD->getMemOperand();

12602SDValue Chain = MemSD->getChain();

12603SDValue BasePtr = MemSD->getBasePtr();

12604

12605 [[maybe_unused]]ISD::LoadExtType LoadExtType;

12606SDValue Index,Mask, PassThru, VL;

12607

12608if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {

12609Index = VPGN->getIndex();

12610Mask = VPGN->getMask();

12611 PassThru = DAG.getUNDEF(VT);

12612 VL = VPGN->getVectorLength();

12613// VP doesn't support extending loads.

12614LoadExtType =ISD::NON_EXTLOAD;

12615 }else {

12616// Else it must be a MGATHER.

12617auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());

12618Index = MGN->getIndex();

12619Mask = MGN->getMask();

12620 PassThru = MGN->getPassThru();

12621LoadExtType = MGN->getExtensionType();

12622 }

12623

12624MVT IndexVT =Index.getSimpleValueType();

12625MVT XLenVT = Subtarget.getXLenVT();

12626

12627assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&

12628"Unexpected VTs!");

12629assert(BasePtr.getSimpleValueType() == XLenVT &&"Unexpected pointer type");

12630// Targets have to explicitly opt-in for extending vector loads.

12631assert(LoadExtType ==ISD::NON_EXTLOAD &&

12632"Unexpected extending MGATHER/VP_GATHER");

12633

12634// If the mask is known to be all ones, optimize to an unmasked intrinsic;

12635// the selection of the masked intrinsics doesn't do this for us.

12636bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());

12637

12638MVT ContainerVT = VT;

12639if (VT.isFixedLengthVector()) {

12640 ContainerVT =getContainerForFixedLengthVector(VT);

12641 IndexVT =MVT::getVectorVT(IndexVT.getVectorElementType(),

12642 ContainerVT.getVectorElementCount());

12643

12644Index =convertToScalableVector(IndexVT, Index, DAG, Subtarget);

12645

12646if (!IsUnmasked) {

12647MVT MaskVT =getMaskTypeFor(ContainerVT);

12648Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

12649 PassThru =convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);

12650 }

12651 }

12652

12653if (!VL)

12654 VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;

12655

12656if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {

12657 IndexVT = IndexVT.changeVectorElementType(XLenVT);

12658Index = DAG.getNode(ISD::TRUNCATE,DL, IndexVT, Index);

12659 }

12660

12661unsigned IntID =

12662 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;

12663SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID,DL, XLenVT)};

12664if (IsUnmasked)

12665 Ops.push_back(DAG.getUNDEF(ContainerVT));

12666else

12667 Ops.push_back(PassThru);

12668 Ops.push_back(BasePtr);

12669 Ops.push_back(Index);

12670if (!IsUnmasked)

12671 Ops.push_back(Mask);

12672 Ops.push_back(VL);

12673if (!IsUnmasked)

12674 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC,DL, XLenVT));

12675

12676SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});

12677SDValue Result =

12678 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,DL, VTs, Ops, MemVT, MMO);

12679 Chain =Result.getValue(1);

12680

12681if (VT.isFixedLengthVector())

12682Result =convertFromScalableVector(VT, Result, DAG, Subtarget);

12683

12684return DAG.getMergeValues({Result, Chain},DL);

12685}

12686

12687// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be

12688// matched to a RVV indexed store. The RVV indexed store instructions only

12689// support the "unsigned unscaled" addressing mode; indices are implicitly

12690// zero-extended or truncated to XLEN and are treated as byte offsets. Any

12691// signed or scaled indexing is extended to the XLEN value type and scaled

12692// accordingly.

12693SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,

12694SelectionDAG &DAG) const{

12695SDLoc DL(Op);

12696constauto *MemSD = cast<MemSDNode>(Op.getNode());

12697EVT MemVT = MemSD->getMemoryVT();

12698MachineMemOperand *MMO = MemSD->getMemOperand();

12699SDValue Chain = MemSD->getChain();

12700SDValue BasePtr = MemSD->getBasePtr();

12701

12702 [[maybe_unused]]bool IsTruncatingStore =false;

12703SDValue Index,Mask, Val, VL;

12704

12705if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {

12706Index = VPSN->getIndex();

12707Mask = VPSN->getMask();

12708 Val = VPSN->getValue();

12709 VL = VPSN->getVectorLength();

12710// VP doesn't support truncating stores.

12711 IsTruncatingStore =false;

12712 }else {

12713// Else it must be a MSCATTER.

12714auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());

12715Index = MSN->getIndex();

12716Mask = MSN->getMask();

12717 Val = MSN->getValue();

12718 IsTruncatingStore = MSN->isTruncatingStore();

12719 }

12720

12721MVT VT = Val.getSimpleValueType();

12722MVT IndexVT =Index.getSimpleValueType();

12723MVT XLenVT = Subtarget.getXLenVT();

12724

12725assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&

12726"Unexpected VTs!");

12727assert(BasePtr.getSimpleValueType() == XLenVT &&"Unexpected pointer type");

12728// Targets have to explicitly opt-in for extending vector loads and

12729// truncating vector stores.

12730assert(!IsTruncatingStore &&"Unexpected truncating MSCATTER/VP_SCATTER");

12731

12732// If the mask is known to be all ones, optimize to an unmasked intrinsic;

12733// the selection of the masked intrinsics doesn't do this for us.

12734bool IsUnmasked =ISD::isConstantSplatVectorAllOnes(Mask.getNode());

12735

12736MVT ContainerVT = VT;

12737if (VT.isFixedLengthVector()) {

12738 ContainerVT =getContainerForFixedLengthVector(VT);

12739 IndexVT =MVT::getVectorVT(IndexVT.getVectorElementType(),

12740 ContainerVT.getVectorElementCount());

12741

12742Index =convertToScalableVector(IndexVT, Index, DAG, Subtarget);

12743 Val =convertToScalableVector(ContainerVT, Val, DAG, Subtarget);

12744

12745if (!IsUnmasked) {

12746MVT MaskVT =getMaskTypeFor(ContainerVT);

12747Mask =convertToScalableVector(MaskVT, Mask, DAG, Subtarget);

12748 }

12749 }

12750

12751if (!VL)

12752 VL =getDefaultVLOps(VT, ContainerVT,DL, DAG, Subtarget).second;

12753

12754if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {

12755 IndexVT = IndexVT.changeVectorElementType(XLenVT);

12756Index = DAG.getNode(ISD::TRUNCATE,DL, IndexVT, Index);

12757 }

12758

12759unsigned IntID =

12760 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;

12761SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID,DL, XLenVT)};

12762 Ops.push_back(Val);

12763 Ops.push_back(BasePtr);

12764 Ops.push_back(Index);

12765if (!IsUnmasked)

12766 Ops.push_back(Mask);

12767 Ops.push_back(VL);

12768

12769return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,DL,

12770 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);

12771}

12772

12773SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,

12774SelectionDAG &DAG) const{

12775constMVT XLenVT = Subtarget.getXLenVT();

12776SDLoc DL(Op);

12777SDValue Chain =Op->getOperand(0);

12778SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm,DL, XLenVT);

12779SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);

12780SDValue RM = DAG.getNode(RISCVISD::READ_CSR,DL, VTs, Chain, SysRegNo);

12781

12782// Encoding used for rounding mode in RISC-V differs from that used in

12783// FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a

12784// table, which consists of a sequence of 4-bit fields, each representing

12785// corresponding FLT_ROUNDS mode.

12786staticconstint Table =

12787 (int(RoundingMode::NearestTiesToEven) << 4 *RISCVFPRndMode::RNE) |

12788 (int(RoundingMode::TowardZero) << 4 *RISCVFPRndMode::RTZ) |

12789 (int(RoundingMode::TowardNegative) << 4 *RISCVFPRndMode::RDN) |

12790 (int(RoundingMode::TowardPositive) << 4 *RISCVFPRndMode::RUP) |

12791 (int(RoundingMode::NearestTiesToAway) << 4 *RISCVFPRndMode::RMM);

12792

12793SDValue Shift =

12794 DAG.getNode(ISD::SHL,DL, XLenVT, RM, DAG.getConstant(2,DL, XLenVT));

12795SDValue Shifted = DAG.getNode(ISD::SRL,DL, XLenVT,

12796 DAG.getConstant(Table,DL, XLenVT), Shift);

12797SDValue Masked = DAG.getNode(ISD::AND,DL, XLenVT, Shifted,

12798 DAG.getConstant(7,DL, XLenVT));

12799

12800return DAG.getMergeValues({Masked, Chain},DL);

12801}

12802

12803SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,

12804SelectionDAG &DAG) const{

12805constMVT XLenVT = Subtarget.getXLenVT();

12806SDLoc DL(Op);

12807SDValue Chain =Op->getOperand(0);

12808SDValue RMValue =Op->getOperand(1);

12809SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm,DL, XLenVT);

12810

12811// Encoding used for rounding mode in RISC-V differs from that used in

12812// FLT_ROUNDS. To convert it the C rounding mode is used as an index in

12813// a table, which consists of a sequence of 4-bit fields, each representing

12814// corresponding RISC-V mode.

12815staticconstunsigned Table =

12816 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |

12817 (RISCVFPRndMode::RTZ << 4 *int(RoundingMode::TowardZero)) |

12818 (RISCVFPRndMode::RDN << 4 *int(RoundingMode::TowardNegative)) |

12819 (RISCVFPRndMode::RUP << 4 *int(RoundingMode::TowardPositive)) |

12820 (RISCVFPRndMode::RMM << 4 *int(RoundingMode::NearestTiesToAway));

12821

12822 RMValue = DAG.getNode(ISD::ZERO_EXTEND,DL, XLenVT, RMValue);

12823

12824SDValue Shift = DAG.getNode(ISD::SHL,DL, XLenVT, RMValue,

12825 DAG.getConstant(2,DL, XLenVT));

12826SDValue Shifted = DAG.getNode(ISD::SRL,DL, XLenVT,

12827 DAG.getConstant(Table,DL, XLenVT), Shift);

12828 RMValue = DAG.getNode(ISD::AND,DL, XLenVT, Shifted,

12829 DAG.getConstant(0x7,DL, XLenVT));

12830return DAG.getNode(RISCVISD::WRITE_CSR,DL, MVT::Other, Chain, SysRegNo,

12831 RMValue);

12832}

12833

12834SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,

12835SelectionDAG &DAG) const{

12836MachineFunction &MF = DAG.getMachineFunction();

12837

12838bool isRISCV64 = Subtarget.is64Bit();

12839EVT PtrVT =getPointerTy(DAG.getDataLayout());

12840

12841int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0,false);

12842return DAG.getFrameIndex(FI, PtrVT);

12843}

12844

12845// Returns the opcode of the target-specific SDNode that implements the 32-bit

12846// form of the given Opcode.

12847staticRISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {

12848switch (Opcode) {

12849default:

12850llvm_unreachable("Unexpected opcode");

12851caseISD::SHL:

12852returnRISCVISD::SLLW;

12853caseISD::SRA:

12854returnRISCVISD::SRAW;

12855caseISD::SRL:

12856returnRISCVISD::SRLW;

12857caseISD::SDIV:

12858returnRISCVISD::DIVW;

12859caseISD::UDIV:

12860returnRISCVISD::DIVUW;

12861caseISD::UREM:

12862returnRISCVISD::REMUW;

12863caseISD::ROTL:

12864returnRISCVISD::ROLW;

12865caseISD::ROTR:

12866returnRISCVISD::RORW;

12867 }

12868}

12869

12870// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG

12871// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would

12872// otherwise be promoted to i64, making it difficult to select the

12873// SLLW/DIVUW/.../*W later one because the fact the operation was originally of

12874// type i8/i16/i32 is lost.

12875staticSDValue customLegalizeToWOp(SDNode *N,SelectionDAG &DAG,

12876unsigned ExtOpc =ISD::ANY_EXTEND) {

12877SDLoc DL(N);

12878RISCVISD::NodeType WOpcode =getRISCVWOpcode(N->getOpcode());

12879SDValue NewOp0 = DAG.getNode(ExtOpc,DL, MVT::i64,N->getOperand(0));

12880SDValue NewOp1 = DAG.getNode(ExtOpc,DL, MVT::i64,N->getOperand(1));

12881SDValue NewRes = DAG.getNode(WOpcode,DL, MVT::i64, NewOp0, NewOp1);

12882// ReplaceNodeResults requires we maintain the same type for the return value.

12883return DAG.getNode(ISD::TRUNCATE,DL,N->getValueType(0), NewRes);

12884}

12885

12886// Converts the given 32-bit operation to a i64 operation with signed extension

12887// semantic to reduce the signed extension instructions.

12888staticSDValue customLegalizeToWOpWithSExt(SDNode *N,SelectionDAG &DAG) {

12889SDLoc DL(N);

12890SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));

12891SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));

12892SDValue NewWOp = DAG.getNode(N->getOpcode(),DL, MVT::i64, NewOp0, NewOp1);

12893SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, NewWOp,

12894 DAG.getValueType(MVT::i32));

12895return DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, NewRes);

12896}

12897

12898voidRISCVTargetLowering::ReplaceNodeResults(SDNode *N,

12899SmallVectorImpl<SDValue> &Results,

12900SelectionDAG &DAG) const{

12901SDLoc DL(N);

12902switch (N->getOpcode()) {

12903default:

12904llvm_unreachable("Don't know how to custom type legalize this operation!");

12905caseISD::STRICT_FP_TO_SINT:

12906caseISD::STRICT_FP_TO_UINT:

12907caseISD::FP_TO_SINT:

12908caseISD::FP_TO_UINT: {

12909assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

12910"Unexpected custom legalisation");

12911bool IsStrict =N->isStrictFPOpcode();

12912bool IsSigned =N->getOpcode() ==ISD::FP_TO_SINT ||

12913N->getOpcode() ==ISD::STRICT_FP_TO_SINT;

12914SDValue Op0 = IsStrict ?N->getOperand(1) :N->getOperand(0);

12915if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=

12916TargetLowering::TypeSoftenFloat) {

12917if (!isTypeLegal(Op0.getValueType()))

12918return;

12919if (IsStrict) {

12920SDValue Chain =N->getOperand(0);

12921// In absense of Zfh, promote f16 to f32, then convert.

12922if (Op0.getValueType() == MVT::f16 &&

12923 !Subtarget.hasStdExtZfhOrZhinx()) {

12924 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND,DL, {MVT::f32, MVT::Other},

12925 {Chain, Op0});

12926 Chain = Op0.getValue(1);

12927 }

12928unsigned Opc = IsSigned ?RISCVISD::STRICT_FCVT_W_RV64

12929 :RISCVISD::STRICT_FCVT_WU_RV64;

12930SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);

12931SDValue Res = DAG.getNode(

12932 Opc,DL, VTs, Chain, Op0,

12933 DAG.getTargetConstant(RISCVFPRndMode::RTZ,DL, MVT::i64));

12934Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

12935Results.push_back(Res.getValue(1));

12936return;

12937 }

12938// For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then

12939// convert.

12940if ((Op0.getValueType() == MVT::f16 &&

12941 !Subtarget.hasStdExtZfhOrZhinx()) ||

12942 Op0.getValueType() == MVT::bf16)

12943 Op0 = DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32, Op0);

12944

12945unsigned Opc = IsSigned ?RISCVISD::FCVT_W_RV64 :RISCVISD::FCVT_WU_RV64;

12946SDValue Res =

12947 DAG.getNode(Opc,DL, MVT::i64, Op0,

12948 DAG.getTargetConstant(RISCVFPRndMode::RTZ,DL, MVT::i64));

12949Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

12950return;

12951 }

12952// If the FP type needs to be softened, emit a library call using the 'si'

12953// version. If we left it to default legalization we'd end up with 'di'. If

12954// the FP type doesn't need to be softened just let generic type

12955// legalization promote the result type.

12956RTLIB::Libcall LC;

12957if (IsSigned)

12958 LC =RTLIB::getFPTOSINT(Op0.getValueType(),N->getValueType(0));

12959else

12960 LC =RTLIB::getFPTOUINT(Op0.getValueType(),N->getValueType(0));

12961MakeLibCallOptions CallOptions;

12962EVT OpVT = Op0.getValueType();

12963 CallOptions.setTypeListBeforeSoften(OpVT,N->getValueType(0),true);

12964SDValue Chain = IsStrict ?N->getOperand(0) :SDValue();

12965SDValue Result;

12966 std::tie(Result, Chain) =

12967makeLibCall(DAG, LC,N->getValueType(0), Op0, CallOptions,DL, Chain);

12968Results.push_back(Result);

12969if (IsStrict)

12970Results.push_back(Chain);

12971break;

12972 }

12973caseISD::LROUND: {

12974SDValue Op0 =N->getOperand(0);

12975EVT Op0VT = Op0.getValueType();

12976if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=

12977TargetLowering::TypeSoftenFloat) {

12978if (!isTypeLegal(Op0VT))

12979return;

12980

12981// In absense of Zfh, promote f16 to f32, then convert.

12982if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())

12983 Op0 = DAG.getNode(ISD::FP_EXTEND,DL, MVT::f32, Op0);

12984

12985SDValue Res =

12986 DAG.getNode(RISCVISD::FCVT_W_RV64,DL, MVT::i64, Op0,

12987 DAG.getTargetConstant(RISCVFPRndMode::RMM,DL, MVT::i64));

12988Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

12989return;

12990 }

12991// If the FP type needs to be softened, emit a library call to lround. We'll

12992// need to truncate the result. We assume any value that doesn't fit in i32

12993// is allowed to return an unspecified value.

12994RTLIB::Libcall LC =

12995 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;

12996MakeLibCallOptions CallOptions;

12997EVT OpVT = Op0.getValueType();

12998 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64,true);

12999SDValue Result =makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions,DL).first;

13000 Result = DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Result);

13001Results.push_back(Result);

13002break;

13003 }

13004caseISD::READCYCLECOUNTER:

13005caseISD::READSTEADYCOUNTER: {

13006assert(!Subtarget.is64Bit() &&"READCYCLECOUNTER/READSTEADYCOUNTER only "

13007"has custom type legalization on riscv32");

13008

13009SDValue LoCounter, HiCounter;

13010MVT XLenVT = Subtarget.getXLenVT();

13011if (N->getOpcode() ==ISD::READCYCLECOUNTER) {

13012 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle,DL, XLenVT);

13013 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh,DL, XLenVT);

13014 }else {

13015 LoCounter = DAG.getTargetConstant(RISCVSysReg::time,DL, XLenVT);

13016 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh,DL, XLenVT);

13017 }

13018SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);

13019SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE,DL, VTs,

13020N->getOperand(0), LoCounter, HiCounter);

13021

13022Results.push_back(

13023 DAG.getNode(ISD::BUILD_PAIR,DL, MVT::i64, RCW, RCW.getValue(1)));

13024Results.push_back(RCW.getValue(2));

13025break;

13026 }

13027caseISD::LOAD: {

13028if (!ISD::isNON_EXTLoad(N))

13029return;

13030

13031// Use a SEXTLOAD instead of the default EXTLOAD. Similar to the

13032// sext_inreg we emit for ADD/SUB/MUL/SLLI.

13033LoadSDNode *Ld = cast<LoadSDNode>(N);

13034

13035SDLoc dl(N);

13036SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),

13037 Ld->getBasePtr(), Ld->getMemoryVT(),

13038 Ld->getMemOperand());

13039Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));

13040Results.push_back(Res.getValue(1));

13041return;

13042 }

13043caseISD::MUL: {

13044unsignedSize =N->getSimpleValueType(0).getSizeInBits();

13045unsigned XLen = Subtarget.getXLen();

13046// This multiply needs to be expanded, try to use MULHSU+MUL if possible.

13047if (Size > XLen) {

13048assert(Size == (XLen * 2) &&"Unexpected custom legalisation");

13049SDValue LHS =N->getOperand(0);

13050SDValue RHS =N->getOperand(1);

13051APInt HighMask =APInt::getHighBitsSet(Size, XLen);

13052

13053bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);

13054bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);

13055// We need exactly one side to be unsigned.

13056if (LHSIsU == RHSIsU)

13057return;

13058

13059auto MakeMULPair = [&](SDValue S,SDValue U) {

13060MVT XLenVT = Subtarget.getXLenVT();

13061 S = DAG.getNode(ISD::TRUNCATE,DL, XLenVT, S);

13062 U = DAG.getNode(ISD::TRUNCATE,DL, XLenVT, U);

13063SDValue Lo = DAG.getNode(ISD::MUL,DL, XLenVT, S, U);

13064SDValue Hi = DAG.getNode(RISCVISD::MULHSU,DL, XLenVT, S, U);

13065return DAG.getNode(ISD::BUILD_PAIR,DL,N->getValueType(0),Lo,Hi);

13066 };

13067

13068bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;

13069bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;

13070

13071// The other operand should be signed, but still prefer MULH when

13072// possible.

13073if (RHSIsU && LHSIsS && !RHSIsS)

13074Results.push_back(MakeMULPair(LHS,RHS));

13075elseif (LHSIsU && RHSIsS && !LHSIsS)

13076Results.push_back(MakeMULPair(RHS,LHS));

13077

13078return;

13079 }

13080 [[fallthrough]];

13081 }

13082caseISD::ADD:

13083caseISD::SUB:

13084assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

13085"Unexpected custom legalisation");

13086Results.push_back(customLegalizeToWOpWithSExt(N, DAG));

13087break;

13088caseISD::SHL:

13089caseISD::SRA:

13090caseISD::SRL:

13091assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

13092"Unexpected custom legalisation");

13093if (N->getOperand(1).getOpcode() !=ISD::Constant) {

13094// If we can use a BSET instruction, allow default promotion to apply.

13095if (N->getOpcode() ==ISD::SHL && Subtarget.hasStdExtZbs() &&

13096isOneConstant(N->getOperand(0)))

13097break;

13098Results.push_back(customLegalizeToWOp(N, DAG));

13099break;

13100 }

13101

13102// Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is

13103// similar to customLegalizeToWOpWithSExt, but we must zero_extend the

13104// shift amount.

13105if (N->getOpcode() ==ISD::SHL) {

13106SDLoc DL(N);

13107SDValue NewOp0 =

13108 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));

13109SDValue NewOp1 =

13110 DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i64,N->getOperand(1));

13111SDValue NewWOp = DAG.getNode(ISD::SHL,DL, MVT::i64, NewOp0, NewOp1);

13112SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, NewWOp,

13113 DAG.getValueType(MVT::i32));

13114Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, NewRes));

13115 }

13116

13117break;

13118caseISD::ROTL:

13119caseISD::ROTR:

13120assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

13121"Unexpected custom legalisation");

13122assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||

13123 Subtarget.hasVendorXTHeadBb()) &&

13124"Unexpected custom legalization");

13125if (!isa<ConstantSDNode>(N->getOperand(1)) &&

13126 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))

13127return;

13128Results.push_back(customLegalizeToWOp(N, DAG));

13129break;

13130caseISD::CTTZ:

13131caseISD::CTTZ_ZERO_UNDEF:

13132caseISD::CTLZ:

13133caseISD::CTLZ_ZERO_UNDEF: {

13134assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

13135"Unexpected custom legalisation");

13136

13137SDValue NewOp0 =

13138 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));

13139bool IsCTZ =

13140N->getOpcode() ==ISD::CTTZ ||N->getOpcode() ==ISD::CTTZ_ZERO_UNDEF;

13141unsigned Opc = IsCTZ ?RISCVISD::CTZW :RISCVISD::CLZW;

13142SDValue Res = DAG.getNode(Opc,DL, MVT::i64, NewOp0);

13143Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13144return;

13145 }

13146caseISD::SDIV:

13147caseISD::UDIV:

13148caseISD::UREM: {

13149MVT VT =N->getSimpleValueType(0);

13150assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&

13151 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&

13152"Unexpected custom legalisation");

13153// Don't promote division/remainder by constant since we should expand those

13154// to multiply by magic constant.

13155AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

13156if (N->getOperand(1).getOpcode() ==ISD::Constant &&

13157 !isIntDivCheap(N->getValueType(0), Attr))

13158return;

13159

13160// If the input is i32, use ANY_EXTEND since the W instructions don't read

13161// the upper 32 bits. For other types we need to sign or zero extend

13162// based on the opcode.

13163unsigned ExtOpc =ISD::ANY_EXTEND;

13164if (VT != MVT::i32)

13165 ExtOpc =N->getOpcode() ==ISD::SDIV ?ISD::SIGN_EXTEND

13166 :ISD::ZERO_EXTEND;

13167

13168Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));

13169break;

13170 }

13171caseISD::SADDO: {

13172assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

13173"Unexpected custom legalisation");

13174

13175// If the RHS is a constant, we can simplify ConditionRHS below. Otherwise

13176// use the default legalization.

13177if (!isa<ConstantSDNode>(N->getOperand(1)))

13178return;

13179

13180SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND,DL, MVT::i64,N->getOperand(0));

13181SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND,DL, MVT::i64,N->getOperand(1));

13182SDValue Res = DAG.getNode(ISD::ADD,DL, MVT::i64,LHS,RHS);

13183 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, Res,

13184 DAG.getValueType(MVT::i32));

13185

13186SDValue Zero = DAG.getConstant(0,DL, MVT::i64);

13187

13188// For an addition, the result should be less than one of the operands (LHS)

13189// if and only if the other operand (RHS) is negative, otherwise there will

13190// be overflow.

13191// For a subtraction, the result should be less than one of the operands

13192// (LHS) if and only if the other operand (RHS) is (non-zero) positive,

13193// otherwise there will be overflow.

13194EVT OType =N->getValueType(1);

13195SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res,LHS,ISD::SETLT);

13196SDValue ConditionRHS = DAG.getSetCC(DL, OType,RHS, Zero,ISD::SETLT);

13197

13198SDValue Overflow =

13199 DAG.getNode(ISD::XOR,DL, OType, ConditionRHS, ResultLowerThanLHS);

13200Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13201Results.push_back(Overflow);

13202return;

13203 }

13204caseISD::UADDO:

13205caseISD::USUBO: {

13206assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

13207"Unexpected custom legalisation");

13208bool IsAdd =N->getOpcode() ==ISD::UADDO;

13209// Create an ADDW or SUBW.

13210SDValue LHS = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));

13211SDValue RHS = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));

13212SDValue Res =

13213 DAG.getNode(IsAdd ?ISD::ADD :ISD::SUB,DL, MVT::i64,LHS,RHS);

13214 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, Res,

13215 DAG.getValueType(MVT::i32));

13216

13217SDValue Overflow;

13218if (IsAdd &&isOneConstant(RHS)) {

13219// Special case uaddo X, 1 overflowed if the addition result is 0.

13220// The general case (X + C) < C is not necessarily beneficial. Although we

13221// reduce the live range of X, we may introduce the materialization of

13222// constant C, especially when the setcc result is used by branch. We have

13223// no compare with constant and branch instructions.

13224 Overflow = DAG.getSetCC(DL,N->getValueType(1), Res,

13225 DAG.getConstant(0,DL, MVT::i64),ISD::SETEQ);

13226 }elseif (IsAdd &&isAllOnesConstant(RHS)) {

13227// Special case uaddo X, -1 overflowed if X != 0.

13228 Overflow = DAG.getSetCC(DL,N->getValueType(1),N->getOperand(0),

13229 DAG.getConstant(0,DL, MVT::i32),ISD::SETNE);

13230 }else {

13231// Sign extend the LHS and perform an unsigned compare with the ADDW

13232// result. Since the inputs are sign extended from i32, this is equivalent

13233// to comparing the lower 32 bits.

13234LHS = DAG.getNode(ISD::SIGN_EXTEND,DL, MVT::i64,N->getOperand(0));

13235 Overflow = DAG.getSetCC(DL,N->getValueType(1), Res,LHS,

13236 IsAdd ?ISD::SETULT :ISD::SETUGT);

13237 }

13238

13239Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13240Results.push_back(Overflow);

13241return;

13242 }

13243caseISD::UADDSAT:

13244caseISD::USUBSAT: {

13245assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

13246 !Subtarget.hasStdExtZbb() &&"Unexpected custom legalisation");

13247// Without Zbb, expand to UADDO/USUBO+select which will trigger our custom

13248// promotion for UADDO/USUBO.

13249Results.push_back(expandAddSubSat(N, DAG));

13250return;

13251 }

13252caseISD::SADDSAT:

13253caseISD::SSUBSAT: {

13254assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

13255"Unexpected custom legalisation");

13256Results.push_back(expandAddSubSat(N, DAG));

13257return;

13258 }

13259caseISD::ABS: {

13260assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

13261"Unexpected custom legalisation");

13262

13263if (Subtarget.hasStdExtZbb()) {

13264// Emit a special ABSW node that will be expanded to NEGW+MAX at isel.

13265// This allows us to remember that the result is sign extended. Expanding

13266// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.

13267SDValue Src = DAG.getNode(ISD::SIGN_EXTEND,DL, MVT::i64,

13268N->getOperand(0));

13269SDValue Abs = DAG.getNode(RISCVISD::ABSW,DL, MVT::i64, Src);

13270Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Abs));

13271return;

13272 }

13273

13274// Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)

13275SDValue Src = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(0));

13276

13277// Freeze the source so we can increase it's use count.

13278 Src = DAG.getFreeze(Src);

13279

13280// Copy sign bit to all bits using the sraiw pattern.

13281SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, Src,

13282 DAG.getValueType(MVT::i32));

13283 SignFill = DAG.getNode(ISD::SRA,DL, MVT::i64, SignFill,

13284 DAG.getConstant(31,DL, MVT::i64));

13285

13286SDValue NewRes = DAG.getNode(ISD::XOR,DL, MVT::i64, Src, SignFill);

13287 NewRes = DAG.getNode(ISD::SUB,DL, MVT::i64, NewRes, SignFill);

13288

13289// NOTE: The result is only required to be anyextended, but sext is

13290// consistent with type legalization of sub.

13291 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, NewRes,

13292 DAG.getValueType(MVT::i32));

13293Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, NewRes));

13294return;

13295 }

13296caseISD::BITCAST: {

13297EVT VT =N->getValueType(0);

13298assert(VT.isInteger() && !VT.isVector() &&"Unexpected VT!");

13299SDValue Op0 =N->getOperand(0);

13300EVT Op0VT = Op0.getValueType();

13301MVT XLenVT = Subtarget.getXLenVT();

13302if (VT == MVT::i16 &&

13303 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||

13304 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {

13305SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, XLenVT, Op0);

13306Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i16, FPConv));

13307 }elseif (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&

13308 Subtarget.hasStdExtFOrZfinx()) {

13309SDValue FPConv =

13310 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, MVT::i64, Op0);

13311Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, FPConv));

13312 }elseif (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&

13313 Subtarget.hasStdExtDOrZdinx()) {

13314SDValue NewReg = DAG.getNode(RISCVISD::SplitF64,DL,

13315 DAG.getVTList(MVT::i32, MVT::i32), Op0);

13316SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR,DL, MVT::i64,

13317 NewReg.getValue(0), NewReg.getValue(1));

13318Results.push_back(RetReg);

13319 }elseif (!VT.isVector() && Op0VT.isFixedLengthVector() &&

13320isTypeLegal(Op0VT)) {

13321// Custom-legalize bitcasts from fixed-length vector types to illegal

13322// scalar types in order to improve codegen. Bitcast the vector to a

13323// one-element vector type whose element type is the same as the result

13324// type, and extract the first element.

13325EVT BVT =EVT::getVectorVT(*DAG.getContext(), VT, 1);

13326if (isTypeLegal(BVT)) {

13327SDValue BVec = DAG.getBitcast(BVT, Op0);

13328Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT,DL, VT, BVec,

13329 DAG.getVectorIdxConstant(0,DL)));

13330 }

13331 }

13332break;

13333 }

13334caseRISCVISD::BREV8:

13335caseRISCVISD::ORC_B: {

13336MVT VT =N->getSimpleValueType(0);

13337MVT XLenVT = Subtarget.getXLenVT();

13338assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&

13339"Unexpected custom legalisation");

13340assert(((N->getOpcode() ==RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||

13341 (N->getOpcode() ==RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&

13342"Unexpected extension");

13343SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND,DL, XLenVT,N->getOperand(0));

13344SDValue NewRes = DAG.getNode(N->getOpcode(),DL, XLenVT, NewOp);

13345// ReplaceNodeResults requires we maintain the same type for the return

13346// value.

13347Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, VT, NewRes));

13348break;

13349 }

13350caseISD::EXTRACT_VECTOR_ELT: {

13351// Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element

13352// type is illegal (currently only vXi64 RV32).

13353// With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are

13354// transferred to the destination register. We issue two of these from the

13355// upper- and lower- halves of the SEW-bit vector element, slid down to the

13356// first element.

13357SDValue Vec =N->getOperand(0);

13358SDValue Idx =N->getOperand(1);

13359

13360// The vector type hasn't been legalized yet so we can't issue target

13361// specific nodes if it needs legalization.

13362// FIXME: We would manually legalize if it's important.

13363if (!isTypeLegal(Vec.getValueType()))

13364return;

13365

13366MVT VecVT = Vec.getSimpleValueType();

13367

13368assert(!Subtarget.is64Bit() &&N->getValueType(0) == MVT::i64 &&

13369 VecVT.getVectorElementType() == MVT::i64 &&

13370"Unexpected EXTRACT_VECTOR_ELT legalization");

13371

13372// If this is a fixed vector, we need to convert it to a scalable vector.

13373MVT ContainerVT = VecVT;

13374if (VecVT.isFixedLengthVector()) {

13375 ContainerVT =getContainerForFixedLengthVector(VecVT);

13376 Vec =convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);

13377 }

13378

13379MVT XLenVT = Subtarget.getXLenVT();

13380

13381// Use a VL of 1 to avoid processing more elements than we need.

13382auto [Mask, VL] =getDefaultVLOps(1, ContainerVT,DL, DAG, Subtarget);

13383

13384// Unless the index is known to be 0, we must slide the vector down to get

13385// the desired element into index 0.

13386if (!isNullConstant(Idx)) {

13387 Vec =getVSlidedown(DAG, Subtarget,DL, ContainerVT,

13388 DAG.getUNDEF(ContainerVT), Vec,Idx, Mask, VL);

13389 }

13390

13391// Extract the lower XLEN bits of the correct vector element.

13392SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, Vec);

13393

13394// To extract the upper XLEN bits of the vector element, shift the first

13395// element right by 32 bits and re-extract the lower XLEN bits.

13396SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, ContainerVT,

13397 DAG.getUNDEF(ContainerVT),

13398 DAG.getConstant(32,DL, XLenVT), VL);

13399SDValue LShr32 =

13400 DAG.getNode(RISCVISD::SRL_VL,DL, ContainerVT, Vec, ThirtyTwoV,

13401 DAG.getUNDEF(ContainerVT), Mask, VL);

13402

13403SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, LShr32);

13404

13405Results.push_back(DAG.getNode(ISD::BUILD_PAIR,DL, MVT::i64, EltLo, EltHi));

13406break;

13407 }

13408caseISD::INTRINSIC_WO_CHAIN: {

13409unsigned IntNo =N->getConstantOperandVal(0);

13410switch (IntNo) {

13411default:

13412llvm_unreachable(

13413"Don't know how to custom type legalize this intrinsic!");

13414case Intrinsic::experimental_get_vector_length: {

13415SDValue Res =lowerGetVectorLength(N, DAG, Subtarget);

13416Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13417return;

13418 }

13419case Intrinsic::experimental_cttz_elts: {

13420SDValue Res =lowerCttzElts(N, DAG, Subtarget);

13421Results.push_back(

13422 DAG.getNode(ISD::TRUNCATE,DL,N->getValueType(0), Res));

13423return;

13424 }

13425case Intrinsic::riscv_orc_b:

13426case Intrinsic::riscv_brev8:

13427case Intrinsic::riscv_sha256sig0:

13428case Intrinsic::riscv_sha256sig1:

13429case Intrinsic::riscv_sha256sum0:

13430case Intrinsic::riscv_sha256sum1:

13431case Intrinsic::riscv_sm3p0:

13432case Intrinsic::riscv_sm3p1: {

13433if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)

13434return;

13435unsigned Opc;

13436switch (IntNo) {

13437case Intrinsic::riscv_orc_b: Opc =RISCVISD::ORC_B;break;

13438case Intrinsic::riscv_brev8: Opc =RISCVISD::BREV8;break;

13439case Intrinsic::riscv_sha256sig0: Opc =RISCVISD::SHA256SIG0;break;

13440case Intrinsic::riscv_sha256sig1: Opc =RISCVISD::SHA256SIG1;break;

13441case Intrinsic::riscv_sha256sum0: Opc =RISCVISD::SHA256SUM0;break;

13442case Intrinsic::riscv_sha256sum1: Opc =RISCVISD::SHA256SUM1;break;

13443case Intrinsic::riscv_sm3p0: Opc =RISCVISD::SM3P0;break;

13444case Intrinsic::riscv_sm3p1: Opc =RISCVISD::SM3P1;break;

13445 }

13446

13447SDValue NewOp =

13448 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));

13449SDValue Res = DAG.getNode(Opc,DL, MVT::i64, NewOp);

13450Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13451return;

13452 }

13453case Intrinsic::riscv_sm4ks:

13454case Intrinsic::riscv_sm4ed: {

13455unsigned Opc =

13456 IntNo == Intrinsic::riscv_sm4ks ?RISCVISD::SM4KS :RISCVISD::SM4ED;

13457SDValue NewOp0 =

13458 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));

13459SDValue NewOp1 =

13460 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(2));

13461SDValue Res =

13462 DAG.getNode(Opc,DL, MVT::i64, NewOp0, NewOp1,N->getOperand(3));

13463Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13464return;

13465 }

13466case Intrinsic::riscv_mopr: {

13467if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)

13468return;

13469SDValue NewOp =

13470 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));

13471SDValue Res = DAG.getNode(

13472RISCVISD::MOPR,DL, MVT::i64, NewOp,

13473 DAG.getTargetConstant(N->getConstantOperandVal(2),DL, MVT::i64));

13474Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13475return;

13476 }

13477case Intrinsic::riscv_moprr: {

13478if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)

13479return;

13480SDValue NewOp0 =

13481 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));

13482SDValue NewOp1 =

13483 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(2));

13484SDValue Res = DAG.getNode(

13485RISCVISD::MOPRR,DL, MVT::i64, NewOp0, NewOp1,

13486 DAG.getTargetConstant(N->getConstantOperandVal(3),DL, MVT::i64));

13487Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13488return;

13489 }

13490case Intrinsic::riscv_clmul: {

13491if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)

13492return;

13493

13494SDValue NewOp0 =

13495 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));

13496SDValue NewOp1 =

13497 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(2));

13498SDValue Res = DAG.getNode(RISCVISD::CLMUL,DL, MVT::i64, NewOp0, NewOp1);

13499Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13500return;

13501 }

13502case Intrinsic::riscv_clmulh:

13503case Intrinsic::riscv_clmulr: {

13504if (!Subtarget.is64Bit() ||N->getValueType(0) != MVT::i32)

13505return;

13506

13507// Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros

13508// to the full 128-bit clmul result of multiplying two xlen values.

13509// Perform clmulr or clmulh on the shifted values. Finally, extract the

13510// upper 32 bits.

13511//

13512// The alternative is to mask the inputs to 32 bits and use clmul, but

13513// that requires two shifts to mask each input without zext.w.

13514// FIXME: If the inputs are known zero extended or could be freely

13515// zero extended, the mask form would be better.

13516SDValue NewOp0 =

13517 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(1));

13518SDValue NewOp1 =

13519 DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64,N->getOperand(2));

13520 NewOp0 = DAG.getNode(ISD::SHL,DL, MVT::i64, NewOp0,

13521 DAG.getConstant(32,DL, MVT::i64));

13522 NewOp1 = DAG.getNode(ISD::SHL,DL, MVT::i64, NewOp1,

13523 DAG.getConstant(32,DL, MVT::i64));

13524unsigned Opc = IntNo == Intrinsic::riscv_clmulh ?RISCVISD::CLMULH

13525 :RISCVISD::CLMULR;

13526SDValue Res = DAG.getNode(Opc,DL, MVT::i64, NewOp0, NewOp1);

13527 Res = DAG.getNode(ISD::SRL,DL, MVT::i64, Res,

13528 DAG.getConstant(32,DL, MVT::i64));

13529Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, MVT::i32, Res));

13530return;

13531 }

13532case Intrinsic::riscv_vmv_x_s: {

13533EVT VT =N->getValueType(0);

13534MVT XLenVT = Subtarget.getXLenVT();

13535if (VT.bitsLT(XLenVT)) {

13536// Simple case just extract using vmv.x.s and truncate.

13537SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S,DL,

13538 Subtarget.getXLenVT(),N->getOperand(1));

13539Results.push_back(DAG.getNode(ISD::TRUNCATE,DL, VT, Extract));

13540return;

13541 }

13542

13543assert(VT == MVT::i64 && !Subtarget.is64Bit() &&

13544"Unexpected custom legalization");

13545

13546// We need to do the move in two steps.

13547SDValue Vec =N->getOperand(1);

13548MVT VecVT = Vec.getSimpleValueType();

13549

13550// First extract the lower XLEN bits of the element.

13551SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, Vec);

13552

13553// To extract the upper XLEN bits of the vector element, shift the first

13554// element right by 32 bits and re-extract the lower XLEN bits.

13555auto [Mask, VL] =getDefaultVLOps(1, VecVT,DL, DAG, Subtarget);

13556

13557SDValue ThirtyTwoV =

13558 DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VecVT, DAG.getUNDEF(VecVT),

13559 DAG.getConstant(32,DL, XLenVT), VL);

13560SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL,DL, VecVT, Vec, ThirtyTwoV,

13561 DAG.getUNDEF(VecVT), Mask, VL);

13562SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S,DL, XLenVT, LShr32);

13563

13564Results.push_back(

13565 DAG.getNode(ISD::BUILD_PAIR,DL, MVT::i64, EltLo, EltHi));

13566break;

13567 }

13568 }

13569break;

13570 }

13571caseISD::VECREDUCE_ADD:

13572caseISD::VECREDUCE_AND:

13573caseISD::VECREDUCE_OR:

13574caseISD::VECREDUCE_XOR:

13575caseISD::VECREDUCE_SMAX:

13576caseISD::VECREDUCE_UMAX:

13577caseISD::VECREDUCE_SMIN:

13578caseISD::VECREDUCE_UMIN:

13579if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))

13580Results.push_back(V);

13581break;

13582case ISD::VP_REDUCE_ADD:

13583case ISD::VP_REDUCE_AND:

13584case ISD::VP_REDUCE_OR:

13585case ISD::VP_REDUCE_XOR:

13586case ISD::VP_REDUCE_SMAX:

13587case ISD::VP_REDUCE_UMAX:

13588case ISD::VP_REDUCE_SMIN:

13589case ISD::VP_REDUCE_UMIN:

13590if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))

13591Results.push_back(V);

13592break;

13593caseISD::GET_ROUNDING: {

13594SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);

13595SDValue Res = DAG.getNode(ISD::GET_ROUNDING,DL, VTs,N->getOperand(0));

13596Results.push_back(Res.getValue(0));

13597Results.push_back(Res.getValue(1));

13598break;

13599 }

13600 }

13601}

13602

13603/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP

13604/// which corresponds to it.

13605staticunsignedgetVecReduceOpcode(unsigned Opc) {

13606switch (Opc) {

13607default:

13608llvm_unreachable("Unhandled binary to transfrom reduction");

13609caseISD::ADD:

13610returnISD::VECREDUCE_ADD;

13611caseISD::UMAX:

13612returnISD::VECREDUCE_UMAX;

13613caseISD::SMAX:

13614returnISD::VECREDUCE_SMAX;

13615caseISD::UMIN:

13616returnISD::VECREDUCE_UMIN;

13617caseISD::SMIN:

13618returnISD::VECREDUCE_SMIN;

13619caseISD::AND:

13620returnISD::VECREDUCE_AND;

13621caseISD::OR:

13622returnISD::VECREDUCE_OR;

13623caseISD::XOR:

13624returnISD::VECREDUCE_XOR;

13625caseISD::FADD:

13626// Note: This is the associative form of the generic reduction opcode.

13627returnISD::VECREDUCE_FADD;

13628 }

13629}

13630

13631/// Perform two related transforms whose purpose is to incrementally recognize

13632/// an explode_vector followed by scalar reduction as a vector reduction node.

13633/// This exists to recover from a deficiency in SLP which can't handle

13634/// forests with multiple roots sharing common nodes. In some cases, one

13635/// of the trees will be vectorized, and the other will remain (unprofitably)

13636/// scalarized.

13637staticSDValue

13638combineBinOpOfExtractToReduceTree(SDNode *N,SelectionDAG &DAG,

13639constRISCVSubtarget &Subtarget) {

13640

13641// This transforms need to run before all integer types have been legalized

13642// to i64 (so that the vector element type matches the add type), and while

13643// it's safe to introduce odd sized vector types.

13644if (DAG.NewNodesMustHaveLegalTypes)

13645returnSDValue();

13646

13647// Without V, this transform isn't useful. We could form the (illegal)

13648// operations and let them be scalarized again, but there's really no point.

13649if (!Subtarget.hasVInstructions())

13650returnSDValue();

13651

13652constSDLoc DL(N);

13653constEVT VT =N->getValueType(0);

13654constunsigned Opc =N->getOpcode();

13655

13656// For FADD, we only handle the case with reassociation allowed. We

13657// could handle strict reduction order, but at the moment, there's no

13658// known reason to, and the complexity isn't worth it.

13659// TODO: Handle fminnum and fmaxnum here

13660if (!VT.isInteger() &&

13661 (Opc !=ISD::FADD || !N->getFlags().hasAllowReassociation()))

13662returnSDValue();

13663

13664constunsigned ReduceOpc =getVecReduceOpcode(Opc);

13665assert(Opc ==ISD::getVecReduceBaseOpcode(ReduceOpc) &&

13666"Inconsistent mappings");

13667SDValue LHS =N->getOperand(0);

13668SDValue RHS =N->getOperand(1);

13669

13670if (!LHS.hasOneUse() || !RHS.hasOneUse())

13671returnSDValue();

13672

13673if (RHS.getOpcode() !=ISD::EXTRACT_VECTOR_ELT)

13674std::swap(LHS,RHS);

13675

13676if (RHS.getOpcode() !=ISD::EXTRACT_VECTOR_ELT ||

13677 !isa<ConstantSDNode>(RHS.getOperand(1)))

13678returnSDValue();

13679

13680uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();

13681SDValue SrcVec =RHS.getOperand(0);

13682EVT SrcVecVT = SrcVec.getValueType();

13683assert(SrcVecVT.getVectorElementType() == VT);

13684if (SrcVecVT.isScalableVector())

13685returnSDValue();

13686

13687if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())

13688returnSDValue();

13689

13690// match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to

13691// reduce_op (extract_subvector [2 x VT] from V). This will form the

13692// root of our reduction tree. TODO: We could extend this to any two

13693// adjacent aligned constant indices if desired.

13694if (LHS.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&

13695LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {

13696uint64_t LHSIdx =

13697 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();

13698if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {

13699EVT ReduceVT =EVT::getVectorVT(*DAG.getContext(), VT, 2);

13700SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ReduceVT, SrcVec,

13701 DAG.getVectorIdxConstant(0,DL));

13702return DAG.getNode(ReduceOpc,DL, VT, Vec,N->getFlags());

13703 }

13704 }

13705

13706// Match (binop (reduce (extract_subvector V, 0),

13707// (extract_vector_elt V, sizeof(SubVec))))

13708// into a reduction of one more element from the original vector V.

13709if (LHS.getOpcode() != ReduceOpc)

13710returnSDValue();

13711

13712SDValue ReduceVec =LHS.getOperand(0);

13713if (ReduceVec.getOpcode() ==ISD::EXTRACT_SUBVECTOR &&

13714 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) ==RHS.getOperand(0) &&

13715isNullConstant(ReduceVec.getOperand(1)) &&

13716 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {

13717// For illegal types (e.g. 3xi32), most will be combined again into a

13718// wider (hopefully legal) type. If this is a terminal state, we are

13719// relying on type legalization here to produce something reasonable

13720// and this lowering quality could probably be improved. (TODO)

13721EVT ReduceVT =EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);

13722SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ReduceVT, SrcVec,

13723 DAG.getVectorIdxConstant(0,DL));

13724return DAG.getNode(ReduceOpc,DL, VT, Vec,

13725 ReduceVec->getFlags() &N->getFlags());

13726 }

13727

13728returnSDValue();

13729}

13730

13731

13732// Try to fold (<bop> x, (reduction.<bop> vec, start))

13733staticSDValue combineBinOpToReduce(SDNode *N,SelectionDAG &DAG,

13734constRISCVSubtarget &Subtarget) {

13735auto BinOpToRVVReduce = [](unsigned Opc) {

13736switch (Opc) {

13737default:

13738llvm_unreachable("Unhandled binary to transfrom reduction");

13739caseISD::ADD:

13740returnRISCVISD::VECREDUCE_ADD_VL;

13741caseISD::UMAX:

13742returnRISCVISD::VECREDUCE_UMAX_VL;

13743caseISD::SMAX:

13744returnRISCVISD::VECREDUCE_SMAX_VL;

13745caseISD::UMIN:

13746returnRISCVISD::VECREDUCE_UMIN_VL;

13747caseISD::SMIN:

13748returnRISCVISD::VECREDUCE_SMIN_VL;

13749caseISD::AND:

13750returnRISCVISD::VECREDUCE_AND_VL;

13751caseISD::OR:

13752returnRISCVISD::VECREDUCE_OR_VL;

13753caseISD::XOR:

13754returnRISCVISD::VECREDUCE_XOR_VL;

13755caseISD::FADD:

13756returnRISCVISD::VECREDUCE_FADD_VL;

13757caseISD::FMAXNUM:

13758returnRISCVISD::VECREDUCE_FMAX_VL;

13759caseISD::FMINNUM:

13760returnRISCVISD::VECREDUCE_FMIN_VL;

13761 }

13762 };

13763

13764auto IsReduction = [&BinOpToRVVReduce](SDValue V,unsigned Opc) {

13765return V.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&

13766isNullConstant(V.getOperand(1)) &&

13767 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);

13768 };

13769

13770unsigned Opc =N->getOpcode();

13771unsigned ReduceIdx;

13772if (IsReduction(N->getOperand(0), Opc))

13773 ReduceIdx = 0;

13774elseif (IsReduction(N->getOperand(1), Opc))

13775 ReduceIdx = 1;

13776else

13777returnSDValue();

13778

13779// Skip if FADD disallows reassociation but the combiner needs.

13780if (Opc ==ISD::FADD && !N->getFlags().hasAllowReassociation())

13781returnSDValue();

13782

13783SDValue Extract =N->getOperand(ReduceIdx);

13784SDValue Reduce = Extract.getOperand(0);

13785if (!Extract.hasOneUse() || !Reduce.hasOneUse())

13786returnSDValue();

13787

13788SDValue ScalarV = Reduce.getOperand(2);

13789EVT ScalarVT = ScalarV.getValueType();

13790if (ScalarV.getOpcode() ==ISD::INSERT_SUBVECTOR &&

13791 ScalarV.getOperand(0)->isUndef() &&

13792isNullConstant(ScalarV.getOperand(2)))

13793 ScalarV = ScalarV.getOperand(1);

13794

13795// Make sure that ScalarV is a splat with VL=1.

13796if (ScalarV.getOpcode() !=RISCVISD::VFMV_S_F_VL &&

13797 ScalarV.getOpcode() !=RISCVISD::VMV_S_X_VL &&

13798 ScalarV.getOpcode() !=RISCVISD::VMV_V_X_VL)

13799returnSDValue();

13800

13801if (!isNonZeroAVL(ScalarV.getOperand(2)))

13802returnSDValue();

13803

13804// Check the scalar of ScalarV is neutral element

13805// TODO: Deal with value other than neutral element.

13806if (!isNeutralConstant(N->getOpcode(),N->getFlags(), ScalarV.getOperand(1),

13807 0))

13808returnSDValue();

13809

13810// If the AVL is zero, operand 0 will be returned. So it's not safe to fold.

13811// FIXME: We might be able to improve this if operand 0 is undef.

13812if (!isNonZeroAVL(Reduce.getOperand(5)))

13813returnSDValue();

13814

13815SDValue NewStart =N->getOperand(1 - ReduceIdx);

13816

13817SDLoc DL(N);

13818SDValue NewScalarV =

13819lowerScalarInsert(NewStart, ScalarV.getOperand(2),

13820 ScalarV.getSimpleValueType(),DL, DAG, Subtarget);

13821

13822// If we looked through an INSERT_SUBVECTOR we need to restore it.

13823if (ScalarVT != ScalarV.getValueType())

13824 NewScalarV =

13825 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, ScalarVT, DAG.getUNDEF(ScalarVT),

13826 NewScalarV, DAG.getVectorIdxConstant(0,DL));

13827

13828SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),

13829 NewScalarV, Reduce.getOperand(3),

13830 Reduce.getOperand(4), Reduce.getOperand(5)};

13831SDValue NewReduce =

13832 DAG.getNode(Reduce.getOpcode(),DL, Reduce.getValueType(), Ops);

13833return DAG.getNode(Extract.getOpcode(),DL, Extract.getValueType(), NewReduce,

13834 Extract.getOperand(1));

13835}

13836

13837// Optimize (add (shl x, c0), (shl y, c1)) ->

13838// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].

13839staticSDValue transformAddShlImm(SDNode *N,SelectionDAG &DAG,

13840constRISCVSubtarget &Subtarget) {

13841// Perform this optimization only in the zba extension.

13842if (!Subtarget.hasStdExtZba())

13843returnSDValue();

13844

13845// Skip for vector types and larger types.

13846EVT VT =N->getValueType(0);

13847if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())

13848returnSDValue();

13849

13850// The two operand nodes must be SHL and have no other use.

13851SDValue N0 =N->getOperand(0);

13852SDValue N1 =N->getOperand(1);

13853if (N0->getOpcode() !=ISD::SHL || N1->getOpcode() !=ISD::SHL ||

13854 !N0->hasOneUse() || !N1->hasOneUse())

13855returnSDValue();

13856

13857// Check c0 and c1.

13858auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));

13859auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));

13860if (!N0C || !N1C)

13861returnSDValue();

13862 int64_t C0 = N0C->getSExtValue();

13863 int64_t C1 = N1C->getSExtValue();

13864if (C0 <= 0 || C1 <= 0)

13865returnSDValue();

13866

13867// Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.

13868 int64_t Bits = std::min(C0, C1);

13869 int64_t Diff = std::abs(C0 - C1);

13870if (Diff != 1 && Diff != 2 && Diff != 3)

13871returnSDValue();

13872

13873// Build nodes.

13874SDLoc DL(N);

13875SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);

13876SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);

13877SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD,DL, VT, NL,

13878 DAG.getConstant(Diff,DL, VT), NS);

13879return DAG.getNode(ISD::SHL,DL, VT, SHADD, DAG.getConstant(Bits,DL, VT));

13880}

13881

13882// Combine a constant select operand into its use:

13883//

13884// (and (select cond, -1, c), x)

13885// -> (select cond, x, (and x, c)) [AllOnes=1]

13886// (or (select cond, 0, c), x)

13887// -> (select cond, x, (or x, c)) [AllOnes=0]

13888// (xor (select cond, 0, c), x)

13889// -> (select cond, x, (xor x, c)) [AllOnes=0]

13890// (add (select cond, 0, c), x)

13891// -> (select cond, x, (add x, c)) [AllOnes=0]

13892// (sub x, (select cond, 0, c))

13893// -> (select cond, x, (sub x, c)) [AllOnes=0]

13894staticSDValue combineSelectAndUse(SDNode *N,SDValue Slct,SDValue OtherOp,

13895SelectionDAG &DAG,boolAllOnes,

13896constRISCVSubtarget &Subtarget) {

13897EVT VT =N->getValueType(0);

13898

13899// Skip vectors.

13900if (VT.isVector())

13901returnSDValue();

13902

13903if (!Subtarget.hasConditionalMoveFusion()) {

13904// (select cond, x, (and x, c)) has custom lowering with Zicond.

13905if ((!Subtarget.hasStdExtZicond() &&

13906 !Subtarget.hasVendorXVentanaCondOps()) ||

13907N->getOpcode() !=ISD::AND)

13908returnSDValue();

13909

13910// Maybe harmful when condition code has multiple use.

13911if (Slct.getOpcode() ==ISD::SELECT && !Slct.getOperand(0).hasOneUse())

13912returnSDValue();

13913

13914// Maybe harmful when VT is wider than XLen.

13915if (VT.getSizeInBits() > Subtarget.getXLen())

13916returnSDValue();

13917 }

13918

13919if ((Slct.getOpcode() !=ISD::SELECT &&

13920 Slct.getOpcode() !=RISCVISD::SELECT_CC) ||

13921 !Slct.hasOneUse())

13922returnSDValue();

13923

13924autoisZeroOrAllOnes = [](SDValue N,boolAllOnes) {

13925returnAllOnes ?isAllOnesConstant(N) :isNullConstant(N);

13926 };

13927

13928bool SwapSelectOps;

13929unsigned OpOffset = Slct.getOpcode() ==RISCVISD::SELECT_CC ? 2 : 0;

13930SDValue TrueVal = Slct.getOperand(1 + OpOffset);

13931SDValue FalseVal = Slct.getOperand(2 + OpOffset);

13932SDValue NonConstantVal;

13933if (isZeroOrAllOnes(TrueVal,AllOnes)) {

13934 SwapSelectOps =false;

13935 NonConstantVal = FalseVal;

13936 }elseif (isZeroOrAllOnes(FalseVal,AllOnes)) {

13937 SwapSelectOps =true;

13938 NonConstantVal = TrueVal;

13939 }else

13940returnSDValue();

13941

13942// Slct is now know to be the desired identity constant when CC is true.

13943 TrueVal = OtherOp;

13944 FalseVal = DAG.getNode(N->getOpcode(),SDLoc(N), VT, OtherOp, NonConstantVal);

13945// Unless SwapSelectOps says the condition should be false.

13946if (SwapSelectOps)

13947std::swap(TrueVal, FalseVal);

13948

13949if (Slct.getOpcode() ==RISCVISD::SELECT_CC)

13950return DAG.getNode(RISCVISD::SELECT_CC,SDLoc(N), VT,

13951 {Slct.getOperand(0), Slct.getOperand(1),

13952 Slct.getOperand(2), TrueVal, FalseVal});

13953

13954return DAG.getNode(ISD::SELECT,SDLoc(N), VT,

13955 {Slct.getOperand(0), TrueVal, FalseVal});

13956}

13957

13958// Attempt combineSelectAndUse on each operand of a commutative operator N.

13959staticSDValue combineSelectAndUseCommutative(SDNode *N,SelectionDAG &DAG,

13960boolAllOnes,

13961constRISCVSubtarget &Subtarget) {

13962SDValue N0 =N->getOperand(0);

13963SDValue N1 =N->getOperand(1);

13964if (SDValue Result =combineSelectAndUse(N, N0, N1, DAG,AllOnes, Subtarget))

13965return Result;

13966if (SDValue Result =combineSelectAndUse(N, N1, N0, DAG,AllOnes, Subtarget))

13967return Result;

13968returnSDValue();

13969}

13970

13971// Transform (add (mul x, c0), c1) ->

13972// (add (mul (add x, c1/c0), c0), c1%c0).

13973// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case

13974// that should be excluded is when c0*(c1/c0) is simm12, which will lead

13975// to an infinite loop in DAGCombine if transformed.

13976// Or transform (add (mul x, c0), c1) ->

13977// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),

13978// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner

13979// case that should be excluded is when c0*(c1/c0+1) is simm12, which will

13980// lead to an infinite loop in DAGCombine if transformed.

13981// Or transform (add (mul x, c0), c1) ->

13982// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),

13983// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner

13984// case that should be excluded is when c0*(c1/c0-1) is simm12, which will

13985// lead to an infinite loop in DAGCombine if transformed.

13986// Or transform (add (mul x, c0), c1) ->

13987// (mul (add x, c1/c0), c0).

13988// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.

13989staticSDValue transformAddImmMulImm(SDNode *N,SelectionDAG &DAG,

13990constRISCVSubtarget &Subtarget) {

13991// Skip for vector types and larger types.

13992EVT VT =N->getValueType(0);

13993if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())

13994returnSDValue();

13995// The first operand node must be a MUL and has no other use.

13996SDValue N0 =N->getOperand(0);

13997if (!N0->hasOneUse() || N0->getOpcode() !=ISD::MUL)

13998returnSDValue();

13999// Check if c0 and c1 match above conditions.

14000auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));

14001auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));

14002if (!N0C || !N1C)

14003returnSDValue();

14004// If N0C has multiple uses it's possible one of the cases in

14005// DAGCombiner::isMulAddWithConstProfitable will be true, which would result

14006// in an infinite loop.

14007if (!N0C->hasOneUse())

14008returnSDValue();

14009 int64_t C0 = N0C->getSExtValue();

14010 int64_t C1 = N1C->getSExtValue();

14011 int64_t CA, CB;

14012if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))

14013returnSDValue();

14014// Search for proper CA (non-zero) and CB that both are simm12.

14015if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&

14016 !isInt<12>(C0 * (C1 / C0))) {

14017 CA = C1 / C0;

14018 CB = C1 % C0;

14019 }elseif ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&

14020 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {

14021 CA = C1 / C0 + 1;

14022 CB = C1 % C0 - C0;

14023 }elseif ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&

14024 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {

14025 CA = C1 / C0 - 1;

14026 CB = C1 % C0 + C0;

14027 }else

14028returnSDValue();

14029// Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).

14030SDLoc DL(N);

14031SDValue New0 = DAG.getNode(ISD::ADD,DL, VT, N0->getOperand(0),

14032 DAG.getSignedConstant(CA,DL, VT));

14033SDValue New1 =

14034 DAG.getNode(ISD::MUL,DL, VT, New0, DAG.getSignedConstant(C0,DL, VT));

14035return DAG.getNode(ISD::ADD,DL, VT, New1, DAG.getSignedConstant(CB,DL, VT));

14036}

14037

14038// add (zext, zext) -> zext (add (zext, zext))

14039// sub (zext, zext) -> sext (sub (zext, zext))

14040// mul (zext, zext) -> zext (mul (zext, zext))

14041// sdiv (zext, zext) -> zext (sdiv (zext, zext))

14042// udiv (zext, zext) -> zext (udiv (zext, zext))

14043// srem (zext, zext) -> zext (srem (zext, zext))

14044// urem (zext, zext) -> zext (urem (zext, zext))

14045//

14046// where the sum of the extend widths match, and the the range of the bin op

14047// fits inside the width of the narrower bin op. (For profitability on rvv, we

14048// use a power of two for both inner and outer extend.)

14049staticSDValue combineBinOpOfZExt(SDNode *N,SelectionDAG &DAG) {

14050

14051EVT VT =N->getValueType(0);

14052if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))

14053returnSDValue();

14054

14055SDValue N0 =N->getOperand(0);

14056SDValue N1 =N->getOperand(1);

14057if (N0.getOpcode() !=ISD::ZERO_EXTEND || N1.getOpcode() !=ISD::ZERO_EXTEND)

14058returnSDValue();

14059if (!N0.hasOneUse() || !N1.hasOneUse())

14060returnSDValue();

14061

14062SDValue Src0 = N0.getOperand(0);

14063SDValue Src1 = N1.getOperand(0);

14064EVT SrcVT = Src0.getValueType();

14065if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||

14066 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||

14067 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)

14068returnSDValue();

14069

14070LLVMContext &C = *DAG.getContext();

14071EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(C);

14072EVT NarrowVT =EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());

14073

14074 Src0 = DAG.getNode(ISD::ZERO_EXTEND,SDLoc(Src0), NarrowVT, Src0);

14075 Src1 = DAG.getNode(ISD::ZERO_EXTEND,SDLoc(Src1), NarrowVT, Src1);

14076

14077// Src0 and Src1 are zero extended, so they're always positive if signed.

14078//

14079// sub can produce a negative from two positive operands, so it needs sign

14080// extended. Other nodes produce a positive from two positive operands, so

14081// zero extend instead.

14082unsigned OuterExtend =

14083N->getOpcode() ==ISD::SUB ?ISD::SIGN_EXTEND :ISD::ZERO_EXTEND;

14084

14085return DAG.getNode(

14086 OuterExtend,SDLoc(N), VT,

14087 DAG.getNode(N->getOpcode(),SDLoc(N), NarrowVT, Src0, Src1));

14088}

14089

14090// Try to turn (add (xor bool, 1) -1) into (neg bool).

14091staticSDValue combineAddOfBooleanXor(SDNode *N,SelectionDAG &DAG) {

14092SDValue N0 =N->getOperand(0);

14093SDValue N1 =N->getOperand(1);

14094EVT VT =N->getValueType(0);

14095SDLoc DL(N);

14096

14097// RHS should be -1.

14098if (!isAllOnesConstant(N1))

14099returnSDValue();

14100

14101// Look for (xor X, 1).

14102if (N0.getOpcode() !=ISD::XOR || !isOneConstant(N0.getOperand(1)))

14103returnSDValue();

14104

14105// First xor input should be 0 or 1.

14106APInt Mask =APInt::getBitsSetFrom(VT.getSizeInBits(), 1);

14107if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))

14108returnSDValue();

14109

14110// Emit a negate of the setcc.

14111return DAG.getNode(ISD::SUB,DL, VT, DAG.getConstant(0,DL, VT),

14112 N0.getOperand(0));

14113}

14114

14115staticSDValue performADDCombine(SDNode *N,

14116TargetLowering::DAGCombinerInfo &DCI,

14117constRISCVSubtarget &Subtarget) {

14118SelectionDAG &DAG = DCI.DAG;

14119if (SDValue V =combineAddOfBooleanXor(N, DAG))

14120return V;

14121if (SDValue V =transformAddImmMulImm(N, DAG, Subtarget))

14122return V;

14123if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())

14124if (SDValue V =transformAddShlImm(N, DAG, Subtarget))

14125return V;

14126if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))

14127return V;

14128if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

14129return V;

14130if (SDValue V =combineBinOpOfZExt(N, DAG))

14131return V;

14132

14133// fold (add (select lhs, rhs, cc, 0, y), x) ->

14134// (select lhs, rhs, cc, x, (add x, y))

14135returncombineSelectAndUseCommutative(N, DAG,/*AllOnes*/false, Subtarget);

14136}

14137

14138// Try to turn a sub boolean RHS and constant LHS into an addi.

14139staticSDValue combineSubOfBoolean(SDNode *N,SelectionDAG &DAG) {

14140SDValue N0 =N->getOperand(0);

14141SDValue N1 =N->getOperand(1);

14142EVT VT =N->getValueType(0);

14143SDLoc DL(N);

14144

14145// Require a constant LHS.

14146auto *N0C = dyn_cast<ConstantSDNode>(N0);

14147if (!N0C)

14148returnSDValue();

14149

14150// All our optimizations involve subtracting 1 from the immediate and forming

14151// an ADDI. Make sure the new immediate is valid for an ADDI.

14152APInt ImmValMinus1 = N0C->getAPIntValue() - 1;

14153if (!ImmValMinus1.isSignedIntN(12))

14154returnSDValue();

14155

14156SDValue NewLHS;

14157if (N1.getOpcode() ==ISD::SETCC && N1.hasOneUse()) {

14158// (sub constant, (setcc x, y, eq/neq)) ->

14159// (add (setcc x, y, neq/eq), constant - 1)

14160ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();

14161EVT SetCCOpVT = N1.getOperand(0).getValueType();

14162if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())

14163returnSDValue();

14164 CCVal =ISD::getSetCCInverse(CCVal, SetCCOpVT);

14165 NewLHS =

14166 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);

14167 }elseif (N1.getOpcode() ==ISD::XOR &&isOneConstant(N1.getOperand(1)) &&

14168 N1.getOperand(0).getOpcode() ==ISD::SETCC) {

14169// (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).

14170// Since setcc returns a bool the xor is equivalent to 1-setcc.

14171 NewLHS = N1.getOperand(0);

14172 }else

14173returnSDValue();

14174

14175SDValue NewRHS = DAG.getConstant(ImmValMinus1,DL, VT);

14176return DAG.getNode(ISD::ADD,DL, VT, NewLHS, NewRHS);

14177}

14178

14179// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is

14180// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)

14181// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is

14182// valid with Y=3, while 0b0000_1000_0000_0100 is not.

14183staticSDValue combineSubShiftToOrcB(SDNode *N,SelectionDAG &DAG,

14184constRISCVSubtarget &Subtarget) {

14185if (!Subtarget.hasStdExtZbb())

14186returnSDValue();

14187

14188EVT VT =N->getValueType(0);

14189

14190if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)

14191returnSDValue();

14192

14193SDValue N0 =N->getOperand(0);

14194SDValue N1 =N->getOperand(1);

14195

14196if (N0->getOpcode() !=ISD::SHL)

14197returnSDValue();

14198

14199auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));

14200if (!ShAmtCLeft)

14201returnSDValue();

14202unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();

14203

14204if (ShiftedAmount >= 8)

14205returnSDValue();

14206

14207SDValue LeftShiftOperand = N0->getOperand(0);

14208SDValue RightShiftOperand = N1;

14209

14210if (ShiftedAmount != 0) {// Right operand must be a right shift.

14211if (N1->getOpcode() !=ISD::SRL)

14212returnSDValue();

14213auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));

14214if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)

14215returnSDValue();

14216 RightShiftOperand = N1.getOperand(0);

14217 }

14218

14219// At least one shift should have a single use.

14220if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))

14221returnSDValue();

14222

14223if (LeftShiftOperand != RightShiftOperand)

14224returnSDValue();

14225

14226APInt Mask =APInt::getSplat(VT.getSizeInBits(),APInt(8, 0x1));

14227 Mask <<= ShiftedAmount;

14228// Check that X has indeed the right shape (only the Y-th bit can be set in

14229// every byte).

14230if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))

14231returnSDValue();

14232

14233return DAG.getNode(RISCVISD::ORC_B,SDLoc(N), VT, LeftShiftOperand);

14234}

14235

14236staticSDValue performSUBCombine(SDNode *N,SelectionDAG &DAG,

14237constRISCVSubtarget &Subtarget) {

14238if (SDValue V =combineSubOfBoolean(N, DAG))

14239return V;

14240

14241EVT VT =N->getValueType(0);

14242SDValue N0 =N->getOperand(0);

14243SDValue N1 =N->getOperand(1);

14244// fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)

14245if (isNullConstant(N0) && N1.getOpcode() ==ISD::SETCC && N1.hasOneUse() &&

14246isNullConstant(N1.getOperand(1))) {

14247ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();

14248if (CCVal ==ISD::SETLT) {

14249SDLoc DL(N);

14250unsigned ShAmt = N0.getValueSizeInBits() - 1;

14251return DAG.getNode(ISD::SRA,DL, VT, N1.getOperand(0),

14252 DAG.getConstant(ShAmt,DL, VT));

14253 }

14254 }

14255

14256if (SDValue V =combineBinOpOfZExt(N, DAG))

14257return V;

14258if (SDValue V =combineSubShiftToOrcB(N, DAG, Subtarget))

14259return V;

14260

14261// fold (sub x, (select lhs, rhs, cc, 0, y)) ->

14262// (select lhs, rhs, cc, x, (sub x, y))

14263returncombineSelectAndUse(N, N1, N0, DAG,/*AllOnes*/false, Subtarget);

14264}

14265

14266// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.

14267// Legalizing setcc can introduce xors like this. Doing this transform reduces

14268// the number of xors and may allow the xor to fold into a branch condition.

14269staticSDValue combineDeMorganOfBoolean(SDNode *N,SelectionDAG &DAG) {

14270SDValue N0 =N->getOperand(0);

14271SDValue N1 =N->getOperand(1);

14272bool IsAnd =N->getOpcode() ==ISD::AND;

14273

14274if (N0.getOpcode() !=ISD::XOR || N1.getOpcode() !=ISD::XOR)

14275returnSDValue();

14276

14277if (!N0.hasOneUse() || !N1.hasOneUse())

14278returnSDValue();

14279

14280SDValue N01 = N0.getOperand(1);

14281SDValue N11 = N1.getOperand(1);

14282

14283// For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into

14284// (xor X, -1) based on the upper bits of the other operand being 0. If the

14285// operation is And, allow one of the Xors to use -1.

14286if (isOneConstant(N01)) {

14287if (!isOneConstant(N11) && !(IsAnd &&isAllOnesConstant(N11)))

14288returnSDValue();

14289 }elseif (isOneConstant(N11)) {

14290// N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.

14291if (!(IsAnd &&isAllOnesConstant(N01)))

14292returnSDValue();

14293 }else

14294returnSDValue();

14295

14296EVT VT =N->getValueType(0);

14297

14298SDValue N00 = N0.getOperand(0);

14299SDValue N10 = N1.getOperand(0);

14300

14301// The LHS of the xors needs to be 0/1.

14302APInt Mask =APInt::getBitsSetFrom(VT.getSizeInBits(), 1);

14303if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))

14304returnSDValue();

14305

14306// Invert the opcode and insert a new xor.

14307SDLoc DL(N);

14308unsigned Opc = IsAnd ?ISD::OR :ISD::AND;

14309SDValue Logic = DAG.getNode(Opc,DL, VT, N00, N10);

14310return DAG.getNode(ISD::XOR,DL, VT, Logic, DAG.getConstant(1,DL, VT));

14311}

14312

14313// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to

14314// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed

14315// value to an unsigned value. This will be lowered to vmax and series of

14316// vnclipu instructions later. This can be extended to other truncated types

14317// other than i8 by replacing 256 and 255 with the equivalent constants for the

14318// type.

14319staticSDValue combineTruncSelectToSMaxUSat(SDNode *N,SelectionDAG &DAG) {

14320EVT VT =N->getValueType(0);

14321SDValue N0 =N->getOperand(0);

14322EVT SrcVT = N0.getValueType();

14323

14324constTargetLowering &TLI = DAG.getTargetLoweringInfo();

14325if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))

14326returnSDValue();

14327

14328if (N0.getOpcode() !=ISD::VSELECT || !N0.hasOneUse())

14329returnSDValue();

14330

14331SDValue Cond = N0.getOperand(0);

14332SDValue True = N0.getOperand(1);

14333SDValue False = N0.getOperand(2);

14334

14335if (Cond.getOpcode() !=ISD::SETCC)

14336returnSDValue();

14337

14338// FIXME: Support the version of this pattern with the select operands

14339// swapped.

14340ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

14341if (CCVal !=ISD::SETULT)

14342returnSDValue();

14343

14344SDValue CondLHS =Cond.getOperand(0);

14345SDValue CondRHS =Cond.getOperand(1);

14346

14347if (CondLHS != True)

14348returnSDValue();

14349

14350unsigned ScalarBits = VT.getScalarSizeInBits();

14351

14352// FIXME: Support other constants.

14353ConstantSDNode *CondRHSC =isConstOrConstSplat(CondRHS);

14354if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))

14355returnSDValue();

14356

14357if (False.getOpcode() !=ISD::SIGN_EXTEND)

14358returnSDValue();

14359

14360 False = False.getOperand(0);

14361

14362if (False.getOpcode() !=ISD::SETCC || False.getOperand(0) != True)

14363returnSDValue();

14364

14365ConstantSDNode *FalseRHSC =isConstOrConstSplat(False.getOperand(1));

14366if (!FalseRHSC || !FalseRHSC->isZero())

14367returnSDValue();

14368

14369ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();

14370if (CCVal2 !=ISD::SETGT)

14371returnSDValue();

14372

14373// Emit the signed to unsigned saturation pattern.

14374SDLoc DL(N);

14375SDValue Max =

14376 DAG.getNode(ISD::SMAX,DL, SrcVT, True, DAG.getConstant(0,DL, SrcVT));

14377SDValue Min =

14378 DAG.getNode(ISD::SMIN,DL, SrcVT, Max,

14379 DAG.getConstant((1ULL << ScalarBits) - 1,DL, SrcVT));

14380return DAG.getNode(ISD::TRUNCATE,DL, VT, Min);

14381}

14382

14383staticSDValue performTRUNCATECombine(SDNode *N,SelectionDAG &DAG,

14384constRISCVSubtarget &Subtarget) {

14385SDValue N0 =N->getOperand(0);

14386EVT VT =N->getValueType(0);

14387

14388// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero

14389// extending X. This is safe since we only need the LSB after the shift and

14390// shift amounts larger than 31 would produce poison. If we wait until

14391// type legalization, we'll create RISCVISD::SRLW and we can't recover it

14392// to use a BEXT instruction.

14393if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&

14394 N0.getValueType() == MVT::i32 && N0.getOpcode() ==ISD::SRL &&

14395 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {

14396SDLoc DL(N0);

14397SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64, N0.getOperand(0));

14398SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i64, N0.getOperand(1));

14399SDValue Srl = DAG.getNode(ISD::SRL,DL, MVT::i64, Op0, Op1);

14400return DAG.getNode(ISD::TRUNCATE,SDLoc(N), VT, Srl);

14401 }

14402

14403returncombineTruncSelectToSMaxUSat(N, DAG);

14404}

14405

14406// Combines two comparison operation and logic operation to one selection

14407// operation(min, max) and logic operation. Returns new constructed Node if

14408// conditions for optimization are satisfied.

14409staticSDValue performANDCombine(SDNode *N,

14410TargetLowering::DAGCombinerInfo &DCI,

14411constRISCVSubtarget &Subtarget) {

14412SelectionDAG &DAG = DCI.DAG;

14413

14414SDValue N0 =N->getOperand(0);

14415// Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero

14416// extending X. This is safe since we only need the LSB after the shift and

14417// shift amounts larger than 31 would produce poison. If we wait until

14418// type legalization, we'll create RISCVISD::SRLW and we can't recover it

14419// to use a BEXT instruction.

14420if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&

14421N->getValueType(0) == MVT::i32 &&isOneConstant(N->getOperand(1)) &&

14422 N0.getOpcode() ==ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&

14423 N0.hasOneUse()) {

14424SDLoc DL(N);

14425SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64, N0.getOperand(0));

14426SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i64, N0.getOperand(1));

14427SDValue Srl = DAG.getNode(ISD::SRL,DL, MVT::i64, Op0, Op1);

14428SDValue And = DAG.getNode(ISD::AND,DL, MVT::i64, Srl,

14429 DAG.getConstant(1,DL, MVT::i64));

14430return DAG.getNode(ISD::TRUNCATE,DL, MVT::i32,And);

14431 }

14432

14433if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))

14434return V;

14435if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

14436return V;

14437

14438if (DCI.isAfterLegalizeDAG())

14439if (SDValue V =combineDeMorganOfBoolean(N, DAG))

14440return V;

14441

14442// fold (and (select lhs, rhs, cc, -1, y), x) ->

14443// (select lhs, rhs, cc, x, (and x, y))

14444returncombineSelectAndUseCommutative(N, DAG,/*AllOnes*/true, Subtarget);

14445}

14446

14447// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.

14448// FIXME: Generalize to other binary operators with same operand.

14449staticSDValue combineOrOfCZERO(SDNode *N,SDValue N0,SDValue N1,

14450SelectionDAG &DAG) {

14451assert(N->getOpcode() ==ISD::OR &&"Unexpected opcode");

14452

14453if (N0.getOpcode() !=RISCVISD::CZERO_EQZ ||

14454 N1.getOpcode() !=RISCVISD::CZERO_NEZ ||

14455 !N0.hasOneUse() || !N1.hasOneUse())

14456returnSDValue();

14457

14458// Should have the same condition.

14459SDValue Cond = N0.getOperand(1);

14460if (Cond != N1.getOperand(1))

14461returnSDValue();

14462

14463SDValue TrueV = N0.getOperand(0);

14464SDValue FalseV = N1.getOperand(0);

14465

14466if (TrueV.getOpcode() !=ISD::XOR || FalseV.getOpcode() !=ISD::XOR ||

14467 TrueV.getOperand(1) != FalseV.getOperand(1) ||

14468 !isOneConstant(TrueV.getOperand(1)) ||

14469 !TrueV.hasOneUse() || !FalseV.hasOneUse())

14470returnSDValue();

14471

14472EVT VT =N->getValueType(0);

14473SDLoc DL(N);

14474

14475SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ,DL, VT, TrueV.getOperand(0),

14476Cond);

14477SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ,DL, VT, FalseV.getOperand(0),

14478Cond);

14479SDValue NewOr = DAG.getNode(ISD::OR,DL, VT, NewN0, NewN1);

14480return DAG.getNode(ISD::XOR,DL, VT, NewOr, TrueV.getOperand(1));

14481}

14482

14483staticSDValue performORCombine(SDNode *N,TargetLowering::DAGCombinerInfo &DCI,

14484constRISCVSubtarget &Subtarget) {

14485SelectionDAG &DAG = DCI.DAG;

14486

14487if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))

14488return V;

14489if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

14490return V;

14491

14492if (DCI.isAfterLegalizeDAG())

14493if (SDValue V =combineDeMorganOfBoolean(N, DAG))

14494return V;

14495

14496// Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.

14497// We may be able to pull a common operation out of the true and false value.

14498SDValue N0 =N->getOperand(0);

14499SDValue N1 =N->getOperand(1);

14500if (SDValue V =combineOrOfCZERO(N, N0, N1, DAG))

14501return V;

14502if (SDValue V =combineOrOfCZERO(N, N1, N0, DAG))

14503return V;

14504

14505// fold (or (select cond, 0, y), x) ->

14506// (select cond, x, (or x, y))

14507returncombineSelectAndUseCommutative(N, DAG,/*AllOnes*/false, Subtarget);

14508}

14509

14510staticSDValue performXORCombine(SDNode *N,SelectionDAG &DAG,

14511constRISCVSubtarget &Subtarget) {

14512SDValue N0 =N->getOperand(0);

14513SDValue N1 =N->getOperand(1);

14514

14515// Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use

14516// (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create

14517// RISCVISD:::SLLW and we can't recover it to use a BSET instruction.

14518if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&

14519N->getValueType(0) == MVT::i32 &&isAllOnesConstant(N1) &&

14520 N0.getOpcode() ==ISD::SHL &&isAllOnesConstant(N0.getOperand(0)) &&

14521 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {

14522SDLoc DL(N);

14523SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i64, N0.getOperand(0));

14524SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND,DL, MVT::i64, N0.getOperand(1));

14525SDValue Shl = DAG.getNode(ISD::SHL,DL, MVT::i64, Op0, Op1);

14526SDValue And = DAG.getNOT(DL, Shl, MVT::i64);

14527return DAG.getNode(ISD::TRUNCATE,DL, MVT::i32,And);

14528 }

14529

14530// fold (xor (sllw 1, x), -1) -> (rolw ~1, x)

14531// NOTE: Assumes ROL being legal means ROLW is legal.

14532constTargetLowering &TLI = DAG.getTargetLoweringInfo();

14533if (N0.getOpcode() ==RISCVISD::SLLW &&

14534isAllOnesConstant(N1) &&isOneConstant(N0.getOperand(0)) &&

14535 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {

14536SDLoc DL(N);

14537return DAG.getNode(RISCVISD::ROLW,DL, MVT::i64,

14538 DAG.getConstant(~1,DL, MVT::i64), N0.getOperand(1));

14539 }

14540

14541// Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)

14542if (N0.getOpcode() ==ISD::SETCC &&isOneConstant(N1) && N0.hasOneUse()) {

14543auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));

14544ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();

14545if (ConstN00 &&CC ==ISD::SETLT) {

14546EVT VT = N0.getValueType();

14547SDLoc DL(N0);

14548constAPInt &Imm = ConstN00->getAPIntValue();

14549if ((Imm + 1).isSignedIntN(12))

14550return DAG.getSetCC(DL, VT, N0.getOperand(1),

14551 DAG.getConstant(Imm + 1,DL, VT),CC);

14552 }

14553 }

14554

14555if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))

14556return V;

14557if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

14558return V;

14559

14560// fold (xor (select cond, 0, y), x) ->

14561// (select cond, x, (xor x, y))

14562returncombineSelectAndUseCommutative(N, DAG,/*AllOnes*/false, Subtarget);

14563}

14564

14565// Try to expand a scalar multiply to a faster sequence.

14566staticSDValue expandMul(SDNode *N,SelectionDAG &DAG,

14567TargetLowering::DAGCombinerInfo &DCI,

14568constRISCVSubtarget &Subtarget) {

14569

14570EVT VT =N->getValueType(0);

14571

14572// LI + MUL is usually smaller than the alternative sequence.

14573if (DAG.getMachineFunction().getFunction().hasMinSize())

14574returnSDValue();

14575

14576if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())

14577returnSDValue();

14578

14579if (VT != Subtarget.getXLenVT())

14580returnSDValue();

14581

14582constbool HasShlAdd =

14583 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();

14584

14585ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));

14586if (!CNode)

14587returnSDValue();

14588uint64_t MulAmt = CNode->getZExtValue();

14589

14590// WARNING: The code below is knowingly incorrect with regards to undef semantics.

14591// We're adding additional uses of X here, and in principle, we should be freezing

14592// X before doing so. However, adding freeze here causes real regressions, and no

14593// other target properly freezes X in these cases either.

14594SDValue X =N->getOperand(0);

14595

14596if (HasShlAdd) {

14597for (uint64_t Divisor : {3, 5, 9}) {

14598if (MulAmt % Divisor != 0)

14599continue;

14600uint64_t MulAmt2 = MulAmt / Divisor;

14601// 3/5/9 * 2^N -> shl (shXadd X, X), N

14602if (isPowerOf2_64(MulAmt2)) {

14603SDLoc DL(N);

14604SDValue X =N->getOperand(0);

14605// Put the shift first if we can fold a zext into the

14606// shift forming a slli.uw.

14607if (X.getOpcode() ==ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&

14608X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {

14609SDValue Shl = DAG.getNode(ISD::SHL,DL, VT,X,

14610 DAG.getConstant(Log2_64(MulAmt2),DL, VT));

14611return DAG.getNode(RISCVISD::SHL_ADD,DL, VT, Shl,

14612 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),

14613 Shl);

14614 }

14615// Otherwise, put rhe shl second so that it can fold with following

14616// instructions (e.g. sext or add).

14617SDValue Mul359 =

14618 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,

14619 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),X);

14620return DAG.getNode(ISD::SHL,DL, VT, Mul359,

14621 DAG.getConstant(Log2_64(MulAmt2),DL, VT));

14622 }

14623

14624// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)

14625if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {

14626SDLoc DL(N);

14627SDValue Mul359 =

14628 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,

14629 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),X);

14630return DAG.getNode(RISCVISD::SHL_ADD,DL, VT, Mul359,

14631 DAG.getConstant(Log2_64(MulAmt2 - 1),DL, VT),

14632 Mul359);

14633 }

14634 }

14635

14636// If this is a power 2 + 2/4/8, we can use a shift followed by a single

14637// shXadd. First check if this a sum of two power of 2s because that's

14638// easy. Then count how many zeros are up to the first bit.

14639if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {

14640unsigned ScaleShift =llvm::countr_zero(MulAmt);

14641if (ScaleShift >= 1 && ScaleShift < 4) {

14642unsigned ShiftAmt =Log2_64((MulAmt & (MulAmt - 1)));

14643SDLoc DL(N);

14644SDValue Shift1 =

14645 DAG.getNode(ISD::SHL,DL, VT,X, DAG.getConstant(ShiftAmt,DL, VT));

14646return DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,

14647 DAG.getConstant(ScaleShift,DL, VT), Shift1);

14648 }

14649 }

14650

14651// 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)

14652// This is the two instruction form, there are also three instruction

14653// variants we could implement. e.g.

14654// (2^(1,2,3) * 3,5,9 + 1) << C2

14655// 2^(C1>3) * 3,5,9 +/- 1

14656for (uint64_t Divisor : {3, 5, 9}) {

14657uint64_t C = MulAmt - 1;

14658if (C <= Divisor)

14659continue;

14660unsigned TZ =llvm::countr_zero(C);

14661if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {

14662SDLoc DL(N);

14663SDValue Mul359 =

14664 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,

14665 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),X);

14666return DAG.getNode(RISCVISD::SHL_ADD,DL, VT, Mul359,

14667 DAG.getConstant(TZ,DL, VT),X);

14668 }

14669 }

14670

14671// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))

14672if (MulAmt > 2 &&isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {

14673unsigned ScaleShift =llvm::countr_zero(MulAmt - 1);

14674if (ScaleShift >= 1 && ScaleShift < 4) {

14675unsigned ShiftAmt =Log2_64(((MulAmt - 1) & (MulAmt - 2)));

14676SDLoc DL(N);

14677SDValue Shift1 =

14678 DAG.getNode(ISD::SHL,DL, VT,X, DAG.getConstant(ShiftAmt,DL, VT));

14679return DAG.getNode(ISD::ADD,DL, VT, Shift1,

14680 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,

14681 DAG.getConstant(ScaleShift,DL, VT),X));

14682 }

14683 }

14684

14685// 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))

14686for (uint64_t Offset : {3, 5, 9}) {

14687if (isPowerOf2_64(MulAmt +Offset)) {

14688SDLoc DL(N);

14689SDValue Shift1 =

14690 DAG.getNode(ISD::SHL,DL, VT,X,

14691 DAG.getConstant(Log2_64(MulAmt +Offset),DL, VT));

14692SDValue Mul359 =

14693 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,

14694 DAG.getConstant(Log2_64(Offset - 1),DL, VT),X);

14695return DAG.getNode(ISD::SUB,DL, VT, Shift1, Mul359);

14696 }

14697 }

14698 }

14699

14700// 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))

14701uint64_t MulAmtLowBit = MulAmt & (-MulAmt);

14702if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {

14703uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;

14704SDLoc DL(N);

14705SDValue Shift1 = DAG.getNode(ISD::SHL,DL, VT,N->getOperand(0),

14706 DAG.getConstant(Log2_64(ShiftAmt1),DL, VT));

14707SDValue Shift2 =

14708 DAG.getNode(ISD::SHL,DL, VT,N->getOperand(0),

14709 DAG.getConstant(Log2_64(MulAmtLowBit),DL, VT));

14710return DAG.getNode(ISD::SUB,DL, VT, Shift1, Shift2);

14711 }

14712

14713if (HasShlAdd) {

14714for (uint64_t Divisor : {3, 5, 9}) {

14715if (MulAmt % Divisor != 0)

14716continue;

14717uint64_t MulAmt2 = MulAmt / Divisor;

14718// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples

14719// of 25 which happen to be quite common.

14720for (uint64_t Divisor2 : {3, 5, 9}) {

14721if (MulAmt2 % Divisor2 != 0)

14722continue;

14723uint64_t MulAmt3 = MulAmt2 / Divisor2;

14724if (isPowerOf2_64(MulAmt3)) {

14725SDLoc DL(N);

14726SDValue Mul359A =

14727 DAG.getNode(RISCVISD::SHL_ADD,DL, VT,X,

14728 DAG.getConstant(Log2_64(Divisor - 1),DL, VT),X);

14729SDValue Mul359B = DAG.getNode(

14730RISCVISD::SHL_ADD,DL, VT, Mul359A,

14731 DAG.getConstant(Log2_64(Divisor2 - 1),DL, VT), Mul359A);

14732return DAG.getNode(ISD::SHL,DL, VT, Mul359B,

14733 DAG.getConstant(Log2_64(MulAmt3),DL, VT));

14734 }

14735 }

14736 }

14737 }

14738

14739returnSDValue();

14740}

14741

14742// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->

14743// (bitcast (sra (v2Xi16 (bitcast X)), 15))

14744// Same for other equivalent types with other equivalent constants.

14745staticSDValue combineVectorMulToSraBitcast(SDNode *N,SelectionDAG &DAG) {

14746EVT VT =N->getValueType(0);

14747constTargetLowering &TLI = DAG.getTargetLoweringInfo();

14748

14749// Do this for legal vectors unless they are i1 or i8 vectors.

14750if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)

14751returnSDValue();

14752

14753if (N->getOperand(0).getOpcode() !=ISD::AND ||

14754N->getOperand(0).getOperand(0).getOpcode() !=ISD::SRL)

14755returnSDValue();

14756

14757SDValue And =N->getOperand(0);

14758SDValue Srl =And.getOperand(0);

14759

14760APInt V1, V2, V3;

14761if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||

14762 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||

14763 !ISD::isConstantSplatVector(Srl.getOperand(1).getNode(), V3))

14764returnSDValue();

14765

14766unsigned HalfSize = VT.getScalarSizeInBits() / 2;

14767if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||

14768 V3 != (HalfSize - 1))

14769returnSDValue();

14770

14771EVT HalfVT =EVT::getVectorVT(*DAG.getContext(),

14772EVT::getIntegerVT(*DAG.getContext(), HalfSize),

14773 VT.getVectorElementCount() * 2);

14774SDLoc DL(N);

14775SDValue Cast = DAG.getNode(ISD::BITCAST,DL, HalfVT, Srl.getOperand(0));

14776SDValue Sra = DAG.getNode(ISD::SRA,DL, HalfVT, Cast,

14777 DAG.getConstant(HalfSize - 1,DL, HalfVT));

14778return DAG.getNode(ISD::BITCAST,DL, VT, Sra);

14779}

14780

14781staticSDValue performMULCombine(SDNode *N,SelectionDAG &DAG,

14782TargetLowering::DAGCombinerInfo &DCI,

14783constRISCVSubtarget &Subtarget) {

14784EVT VT =N->getValueType(0);

14785if (!VT.isVector())

14786returnexpandMul(N, DAG, DCI, Subtarget);

14787

14788SDLoc DL(N);

14789SDValue N0 =N->getOperand(0);

14790SDValue N1 =N->getOperand(1);

14791SDValue MulOper;

14792unsigned AddSubOpc;

14793

14794// vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)

14795// (mul x, add (y, 1)) -> (add x, (mul x, y))

14796// vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))

14797// (mul x, (sub 1, y)) -> (sub x, (mul x, y))

14798auto IsAddSubWith1 = [&](SDValue V) ->bool {

14799 AddSubOpc = V->getOpcode();

14800if ((AddSubOpc ==ISD::ADD || AddSubOpc ==ISD::SUB) && V->hasOneUse()) {

14801SDValue Opnd = V->getOperand(1);

14802 MulOper = V->getOperand(0);

14803if (AddSubOpc ==ISD::SUB)

14804std::swap(Opnd, MulOper);

14805if (isOneOrOneSplat(Opnd))

14806returntrue;

14807 }

14808returnfalse;

14809 };

14810

14811if (IsAddSubWith1(N0)) {

14812SDValue MulVal = DAG.getNode(ISD::MUL,DL, VT, N1, MulOper);

14813return DAG.getNode(AddSubOpc,DL, VT, N1, MulVal);

14814 }

14815

14816if (IsAddSubWith1(N1)) {

14817SDValue MulVal = DAG.getNode(ISD::MUL,DL, VT, N0, MulOper);

14818return DAG.getNode(AddSubOpc,DL, VT, N0, MulVal);

14819 }

14820

14821if (SDValue V =combineBinOpOfZExt(N, DAG))

14822return V;

14823

14824if (SDValue V =combineVectorMulToSraBitcast(N, DAG))

14825return V;

14826

14827returnSDValue();

14828}

14829

14830/// According to the property that indexed load/store instructions zero-extend

14831/// their indices, try to narrow the type of index operand.

14832staticboolnarrowIndex(SDValue &N,ISD::MemIndexType IndexType,SelectionDAG &DAG) {

14833if (isIndexTypeSigned(IndexType))

14834returnfalse;

14835

14836if (!N->hasOneUse())

14837returnfalse;

14838

14839EVT VT =N.getValueType();

14840SDLoc DL(N);

14841

14842// In general, what we're doing here is seeing if we can sink a truncate to

14843// a smaller element type into the expression tree building our index.

14844// TODO: We can generalize this and handle a bunch more cases if useful.

14845

14846// Narrow a buildvector to the narrowest element type. This requires less

14847// work and less register pressure at high LMUL, and creates smaller constants

14848// which may be cheaper to materialize.

14849if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {

14850KnownBits Known = DAG.computeKnownBits(N);

14851unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());

14852LLVMContext &C = *DAG.getContext();

14853EVT ResultVT =EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);

14854if (ResultVT.bitsLT(VT.getVectorElementType())) {

14855N = DAG.getNode(ISD::TRUNCATE,DL,

14856 VT.changeVectorElementType(ResultVT),N);

14857returntrue;

14858 }

14859 }

14860

14861// Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).

14862if (N.getOpcode() !=ISD::SHL)

14863returnfalse;

14864

14865SDValue N0 =N.getOperand(0);

14866if (N0.getOpcode() !=ISD::ZERO_EXTEND &&

14867 N0.getOpcode() !=RISCVISD::VZEXT_VL)

14868returnfalse;

14869if (!N0->hasOneUse())

14870returnfalse;

14871

14872APInt ShAmt;

14873SDValue N1 =N.getOperand(1);

14874if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))

14875returnfalse;

14876

14877SDValue Src = N0.getOperand(0);

14878EVT SrcVT = Src.getValueType();

14879unsigned SrcElen = SrcVT.getScalarSizeInBits();

14880unsigned ShAmtV = ShAmt.getZExtValue();

14881unsigned NewElen =PowerOf2Ceil(SrcElen + ShAmtV);

14882 NewElen = std::max(NewElen, 8U);

14883

14884// Skip if NewElen is not narrower than the original extended type.

14885if (NewElen >= N0.getValueType().getScalarSizeInBits())

14886returnfalse;

14887

14888EVT NewEltVT =EVT::getIntegerVT(*DAG.getContext(), NewElen);

14889EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);

14890

14891SDValue NewExt = DAG.getNode(N0->getOpcode(),DL, NewVT, N0->ops());

14892SDValue NewShAmtVec = DAG.getConstant(ShAmtV,DL, NewVT);

14893N = DAG.getNode(ISD::SHL,DL, NewVT, NewExt, NewShAmtVec);

14894returntrue;

14895}

14896

14897// Replace (seteq (i64 (and X, 0xffffffff)), C1) with

14898// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from

14899// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg

14900// can become a sext.w instead of a shift pair.

14901staticSDValue performSETCCCombine(SDNode *N,SelectionDAG &DAG,

14902constRISCVSubtarget &Subtarget) {

14903SDValue N0 =N->getOperand(0);

14904SDValue N1 =N->getOperand(1);

14905EVT VT =N->getValueType(0);

14906EVT OpVT = N0.getValueType();

14907

14908if (OpVT != MVT::i64 || !Subtarget.is64Bit())

14909returnSDValue();

14910

14911// RHS needs to be a constant.

14912auto *N1C = dyn_cast<ConstantSDNode>(N1);

14913if (!N1C)

14914returnSDValue();

14915

14916// LHS needs to be (and X, 0xffffffff).

14917if (N0.getOpcode() !=ISD::AND || !N0.hasOneUse() ||

14918 !isa<ConstantSDNode>(N0.getOperand(1)) ||

14919 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))

14920returnSDValue();

14921

14922// Looking for an equality compare.

14923ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();

14924if (!isIntEqualitySetCC(Cond))

14925returnSDValue();

14926

14927// Don't do this if the sign bit is provably zero, it will be turned back into

14928// an AND.

14929APInt SignMask =APInt::getOneBitSet(64, 31);

14930if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))

14931returnSDValue();

14932

14933constAPInt &C1 = N1C->getAPIntValue();

14934

14935SDLoc dl(N);

14936// If the constant is larger than 2^32 - 1 it is impossible for both sides

14937// to be equal.

14938if (C1.getActiveBits() > 32)

14939return DAG.getBoolConstant(Cond ==ISD::SETNE, dl, VT, OpVT);

14940

14941SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG,N, OpVT,

14942 N0.getOperand(0), DAG.getValueType(MVT::i32));

14943return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),

14944 dl, OpVT),Cond);

14945}

14946

14947staticSDValue

14948performSIGN_EXTEND_INREGCombine(SDNode *N,SelectionDAG &DAG,

14949constRISCVSubtarget &Subtarget) {

14950SDValue Src =N->getOperand(0);

14951EVT VT =N->getValueType(0);

14952EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();

14953unsigned Opc = Src.getOpcode();

14954

14955// Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)

14956// Don't do this with Zhinx. We need to explicitly sign extend the GPR.

14957if (Opc ==RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&

14958 Subtarget.hasStdExtZfhmin())

14959return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH,SDLoc(N), VT,

14960 Src.getOperand(0));

14961

14962// Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32

14963if (Opc ==ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&

14964 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&

14965 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)

14966return DAG.getNode(RISCVISD::SLLW,SDLoc(N), VT, Src.getOperand(0),

14967 Src.getOperand(1));

14968

14969returnSDValue();

14970}

14971

14972namespace{

14973// Forward declaration of the structure holding the necessary information to

14974// apply a combine.

14975structCombineResult;

14976

14977enum ExtKind :uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };

14978/// Helper class for folding sign/zero extensions.

14979/// In particular, this class is used for the following combines:

14980/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w

14981/// sub | sub_vl -> vwsub(u) | vwsub(u)_w

14982/// mul | mul_vl -> vwmul(u) | vwmul_su

14983/// shl | shl_vl -> vwsll

14984/// fadd -> vfwadd | vfwadd_w

14985/// fsub -> vfwsub | vfwsub_w

14986/// fmul -> vfwmul

14987/// An object of this class represents an operand of the operation we want to

14988/// combine.

14989/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of

14990/// NodeExtensionHelper for `a` and one for `b`.

14991///

14992/// This class abstracts away how the extension is materialized and

14993/// how its number of users affect the combines.

14994///

14995/// In particular:

14996/// - VWADD_W is conceptually == add(op0, sext(op1))

14997/// - VWADDU_W == add(op0, zext(op1))

14998/// - VWSUB_W == sub(op0, sext(op1))

14999/// - VWSUBU_W == sub(op0, zext(op1))

15000/// - VFWADD_W == fadd(op0, fpext(op1))

15001/// - VFWSUB_W == fsub(op0, fpext(op1))

15002/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to

15003/// zext|sext(smaller_value).

15004structNodeExtensionHelper {

15005 /// Records if this operand is like being zero extended.

15006bool SupportsZExt;

15007 /// Records if this operand is like being sign extended.

15008 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For

15009 /// instance, a splat constant (e.g., 3), would support being both sign and

15010 /// zero extended.

15011bool SupportsSExt;

15012 /// Records if this operand is like being floating-Point extended.

15013bool SupportsFPExt;

15014 /// This boolean captures whether we care if this operand would still be

15015 /// around after the folding happens.

15016bool EnforceOneUse;

15017 /// Original value that this NodeExtensionHelper represents.

15018SDValue OrigOperand;

15019

15020 /// Get the value feeding the extension or the value itself.

15021 /// E.g., for zext(a), this would return a.

15022SDValue getSource() const{

15023switch (OrigOperand.getOpcode()) {

15024caseISD::ZERO_EXTEND:

15025caseISD::SIGN_EXTEND:

15026caseRISCVISD::VSEXT_VL:

15027caseRISCVISD::VZEXT_VL:

15028caseRISCVISD::FP_EXTEND_VL:

15029return OrigOperand.getOperand(0);

15030default:

15031return OrigOperand;

15032 }

15033 }

15034

15035 /// Check if this instance represents a splat.

15036boolisSplat() const{

15037return OrigOperand.getOpcode() ==RISCVISD::VMV_V_X_VL ||

15038 OrigOperand.getOpcode() ==ISD::SPLAT_VECTOR;

15039 }

15040

15041 /// Get the extended opcode.

15042unsigned getExtOpc(ExtKind SupportsExt) const{

15043switch (SupportsExt) {

15044case ExtKind::SExt:

15045returnRISCVISD::VSEXT_VL;

15046case ExtKind::ZExt:

15047returnRISCVISD::VZEXT_VL;

15048case ExtKind::FPExt:

15049returnRISCVISD::FP_EXTEND_VL;

15050 }

15051llvm_unreachable("Unknown ExtKind enum");

15052 }

15053

15054 /// Get or create a value that can feed \p Root with the given extension \p

15055 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this

15056 /// operand. \see ::getSource().

15057SDValue getOrCreateExtendedOp(SDNode *Root,SelectionDAG &DAG,

15058constRISCVSubtarget &Subtarget,

15059 std::optional<ExtKind> SupportsExt) const{

15060if (!SupportsExt.has_value())

15061return OrigOperand;

15062

15063MVT NarrowVT = getNarrowType(Root, *SupportsExt);

15064

15065SDValue Source = getSource();

15066assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));

15067if (Source.getValueType() == NarrowVT)

15068returnSource;

15069

15070// vfmadd_vl -> vfwmadd_vl can take bf16 operands

15071if (Source.getValueType().getVectorElementType() == MVT::bf16) {

15072assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&

15073 Root->getOpcode() ==RISCVISD::VFMADD_VL);

15074returnSource;

15075 }

15076

15077unsigned ExtOpc = getExtOpc(*SupportsExt);

15078

15079// If we need an extension, we should be changing the type.

15080SDLoc DL(OrigOperand);

15081auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);

15082switch (OrigOperand.getOpcode()) {

15083caseISD::ZERO_EXTEND:

15084caseISD::SIGN_EXTEND:

15085caseRISCVISD::VSEXT_VL:

15086caseRISCVISD::VZEXT_VL:

15087caseRISCVISD::FP_EXTEND_VL:

15088return DAG.getNode(ExtOpc,DL, NarrowVT, Source, Mask, VL);

15089caseISD::SPLAT_VECTOR:

15090return DAG.getSplat(NarrowVT,DL,Source.getOperand(0));

15091caseRISCVISD::VMV_V_X_VL:

15092return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, NarrowVT,

15093 DAG.getUNDEF(NarrowVT),Source.getOperand(1), VL);

15094caseRISCVISD::VFMV_V_F_VL:

15095Source =Source.getOperand(1);

15096assert(Source.getOpcode() ==ISD::FP_EXTEND &&"Unexpected source");

15097Source =Source.getOperand(0);

15098assert(Source.getValueType() == NarrowVT.getVectorElementType());

15099return DAG.getNode(RISCVISD::VFMV_V_F_VL,DL, NarrowVT,

15100 DAG.getUNDEF(NarrowVT), Source, VL);

15101default:

15102// Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL

15103// and that operand should already have the right NarrowVT so no

15104// extension should be required at this point.

15105llvm_unreachable("Unsupported opcode");

15106 }

15107 }

15108

15109 /// Helper function to get the narrow type for \p Root.

15110 /// The narrow type is the type of \p Root where we divided the size of each

15111 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.

15112 /// \pre Both the narrow type and the original type should be legal.

15113staticMVT getNarrowType(constSDNode *Root, ExtKind SupportsExt) {

15114MVT VT = Root->getSimpleValueType(0);

15115

15116// Determine the narrow size.

15117unsigned NarrowSize = VT.getScalarSizeInBits() / 2;

15118

15119MVT EltVT = SupportsExt == ExtKind::FPExt

15120 ?MVT::getFloatingPointVT(NarrowSize)

15121 :MVT::getIntegerVT(NarrowSize);

15122

15123assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&

15124"Trying to extend something we can't represent");

15125MVT NarrowVT =MVT::getVectorVT(EltVT, VT.getVectorElementCount());

15126return NarrowVT;

15127 }

15128

15129 /// Get the opcode to materialize:

15130 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)

15131staticunsigned getSExtOpcode(unsigned Opcode) {

15132switch (Opcode) {

15133caseISD::ADD:

15134caseRISCVISD::ADD_VL:

15135caseRISCVISD::VWADD_W_VL:

15136caseRISCVISD::VWADDU_W_VL:

15137caseISD::OR:

15138returnRISCVISD::VWADD_VL;

15139caseISD::SUB:

15140caseRISCVISD::SUB_VL:

15141caseRISCVISD::VWSUB_W_VL:

15142caseRISCVISD::VWSUBU_W_VL:

15143returnRISCVISD::VWSUB_VL;

15144caseISD::MUL:

15145caseRISCVISD::MUL_VL:

15146returnRISCVISD::VWMUL_VL;

15147default:

15148llvm_unreachable("Unexpected opcode");

15149 }

15150 }

15151

15152 /// Get the opcode to materialize:

15153 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)

15154staticunsigned getZExtOpcode(unsigned Opcode) {

15155switch (Opcode) {

15156caseISD::ADD:

15157caseRISCVISD::ADD_VL:

15158caseRISCVISD::VWADD_W_VL:

15159caseRISCVISD::VWADDU_W_VL:

15160caseISD::OR:

15161returnRISCVISD::VWADDU_VL;

15162caseISD::SUB:

15163caseRISCVISD::SUB_VL:

15164caseRISCVISD::VWSUB_W_VL:

15165caseRISCVISD::VWSUBU_W_VL:

15166returnRISCVISD::VWSUBU_VL;

15167caseISD::MUL:

15168caseRISCVISD::MUL_VL:

15169returnRISCVISD::VWMULU_VL;

15170caseISD::SHL:

15171caseRISCVISD::SHL_VL:

15172returnRISCVISD::VWSLL_VL;

15173default:

15174llvm_unreachable("Unexpected opcode");

15175 }

15176 }

15177

15178 /// Get the opcode to materialize:

15179 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)

15180staticunsigned getFPExtOpcode(unsigned Opcode) {

15181switch (Opcode) {

15182caseRISCVISD::FADD_VL:

15183caseRISCVISD::VFWADD_W_VL:

15184returnRISCVISD::VFWADD_VL;

15185caseRISCVISD::FSUB_VL:

15186caseRISCVISD::VFWSUB_W_VL:

15187returnRISCVISD::VFWSUB_VL;

15188caseRISCVISD::FMUL_VL:

15189returnRISCVISD::VFWMUL_VL;

15190caseRISCVISD::VFMADD_VL:

15191returnRISCVISD::VFWMADD_VL;

15192caseRISCVISD::VFMSUB_VL:

15193returnRISCVISD::VFWMSUB_VL;

15194caseRISCVISD::VFNMADD_VL:

15195returnRISCVISD::VFWNMADD_VL;

15196caseRISCVISD::VFNMSUB_VL:

15197returnRISCVISD::VFWNMSUB_VL;

15198default:

15199llvm_unreachable("Unexpected opcode");

15200 }

15201 }

15202

15203 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->

15204 /// newOpcode(a, b).

15205staticunsigned getSUOpcode(unsigned Opcode) {

15206assert((Opcode ==RISCVISD::MUL_VL || Opcode ==ISD::MUL) &&

15207"SU is only supported for MUL");

15208returnRISCVISD::VWMULSU_VL;

15209 }

15210

15211 /// Get the opcode to materialize

15212 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).

15213staticunsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {

15214switch (Opcode) {

15215caseISD::ADD:

15216caseRISCVISD::ADD_VL:

15217caseISD::OR:

15218return SupportsExt == ExtKind::SExt ?RISCVISD::VWADD_W_VL

15219 :RISCVISD::VWADDU_W_VL;

15220caseISD::SUB:

15221caseRISCVISD::SUB_VL:

15222return SupportsExt == ExtKind::SExt ?RISCVISD::VWSUB_W_VL

15223 :RISCVISD::VWSUBU_W_VL;

15224caseRISCVISD::FADD_VL:

15225returnRISCVISD::VFWADD_W_VL;

15226caseRISCVISD::FSUB_VL:

15227returnRISCVISD::VFWSUB_W_VL;

15228default:

15229llvm_unreachable("Unexpected opcode");

15230 }

15231 }

15232

15233usingCombineToTry = std::function<std::optional<CombineResult>(

15234SDNode */*Root*/,const NodeExtensionHelper &/*LHS*/,

15235const NodeExtensionHelper &/*RHS*/,SelectionDAG &,

15236constRISCVSubtarget &)>;

15237

15238 /// Check if this node needs to be fully folded or extended for all users.

15239bool needToPromoteOtherUsers() const{return EnforceOneUse; }

15240

15241void fillUpExtensionSupportForSplat(SDNode *Root,SelectionDAG &DAG,

15242constRISCVSubtarget &Subtarget) {

15243unsigned Opc = OrigOperand.getOpcode();

15244MVT VT = OrigOperand.getSimpleValueType();

15245

15246assert((Opc ==ISD::SPLAT_VECTOR || Opc ==RISCVISD::VMV_V_X_VL) &&

15247"Unexpected Opcode");

15248

15249// The pasthru must be undef for tail agnostic.

15250if (Opc ==RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())

15251return;

15252

15253// Get the scalar value.

15254SDValue Op = Opc ==ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)

15255 : OrigOperand.getOperand(1);

15256

15257// See if we have enough sign bits or zero bits in the scalar to use a

15258// widening opcode by splatting to smaller element size.

15259unsigned EltBits = VT.getScalarSizeInBits();

15260unsigned ScalarBits =Op.getValueSizeInBits();

15261// If we're not getting all bits from the element, we need special handling.

15262if (ScalarBits < EltBits) {

15263// This should only occur on RV32.

15264assert(Opc ==RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&

15265 !Subtarget.is64Bit() &&"Unexpected splat");

15266// vmv.v.x sign extends narrow inputs.

15267 SupportsSExt =true;

15268

15269// If the input is positive, then sign extend is also zero extend.

15270if (DAG.SignBitIsZero(Op))

15271 SupportsZExt =true;

15272

15273 EnforceOneUse =false;

15274return;

15275 }

15276

15277unsigned NarrowSize = EltBits / 2;

15278// If the narrow type cannot be expressed with a legal VMV,

15279// this is not a valid candidate.

15280if (NarrowSize < 8)

15281return;

15282

15283if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)

15284 SupportsSExt =true;

15285

15286if (DAG.MaskedValueIsZero(Op,

15287APInt::getBitsSetFrom(ScalarBits, NarrowSize)))

15288 SupportsZExt =true;

15289

15290 EnforceOneUse =false;

15291 }

15292

15293bool isSupportedFPExtend(SDNode *Root,MVT NarrowEltVT,

15294constRISCVSubtarget &Subtarget) {

15295// Any f16 extension will neeed zvfh

15296if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())

15297returnfalse;

15298// The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with

15299// zvfbfwma

15300if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||

15301 Root->getOpcode() !=RISCVISD::VFMADD_VL))

15302returnfalse;

15303returntrue;

15304 }

15305

15306 /// Helper method to set the various fields of this struct based on the

15307 /// type of \p Root.

15308void fillUpExtensionSupport(SDNode *Root,SelectionDAG &DAG,

15309constRISCVSubtarget &Subtarget) {

15310 SupportsZExt =false;

15311 SupportsSExt =false;

15312 SupportsFPExt =false;

15313 EnforceOneUse =true;

15314unsigned Opc = OrigOperand.getOpcode();

15315// For the nodes we handle below, we end up using their inputs directly: see

15316// getSource(). However since they either don't have a passthru or we check

15317// that their passthru is undef, we can safely ignore their mask and VL.

15318switch (Opc) {

15319caseISD::ZERO_EXTEND:

15320caseISD::SIGN_EXTEND: {

15321MVT VT = OrigOperand.getSimpleValueType();

15322if (!VT.isVector())

15323break;

15324

15325SDValue NarrowElt = OrigOperand.getOperand(0);

15326MVT NarrowVT = NarrowElt.getSimpleValueType();

15327// i1 types are legal but we can't select V{S,Z}EXT_VLs with them.

15328if (NarrowVT.getVectorElementType() == MVT::i1)

15329break;

15330

15331 SupportsZExt = Opc ==ISD::ZERO_EXTEND;

15332 SupportsSExt = Opc ==ISD::SIGN_EXTEND;

15333break;

15334 }

15335caseRISCVISD::VZEXT_VL:

15336 SupportsZExt =true;

15337break;

15338caseRISCVISD::VSEXT_VL:

15339 SupportsSExt =true;

15340break;

15341caseRISCVISD::FP_EXTEND_VL: {

15342MVT NarrowEltVT =

15343 OrigOperand.getOperand(0).getSimpleValueType().getVectorElementType();

15344if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))

15345break;

15346 SupportsFPExt =true;

15347break;

15348 }

15349caseISD::SPLAT_VECTOR:

15350caseRISCVISD::VMV_V_X_VL:

15351 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);

15352break;

15353caseRISCVISD::VFMV_V_F_VL: {

15354MVT VT = OrigOperand.getSimpleValueType();

15355

15356if (!OrigOperand.getOperand(0).isUndef())

15357break;

15358

15359SDValue Op = OrigOperand.getOperand(1);

15360if (Op.getOpcode() !=ISD::FP_EXTEND)

15361break;

15362

15363if (!isSupportedFPExtend(Root,Op.getOperand(0).getSimpleValueType(),

15364 Subtarget))

15365break;

15366

15367unsigned NarrowSize = VT.getScalarSizeInBits() / 2;

15368unsigned ScalarBits =Op.getOperand(0).getValueSizeInBits();

15369if (NarrowSize != ScalarBits)

15370break;

15371

15372 SupportsFPExt =true;

15373break;

15374 }

15375default:

15376break;

15377 }

15378 }

15379

15380 /// Check if \p Root supports any extension folding combines.

15381staticbool isSupportedRoot(constSDNode *Root,

15382constRISCVSubtarget &Subtarget) {

15383switch (Root->getOpcode()) {

15384caseISD::ADD:

15385caseISD::SUB:

15386caseISD::MUL: {

15387return Root->getValueType(0).isScalableVector();

15388 }

15389caseISD::OR: {

15390return Root->getValueType(0).isScalableVector() &&

15391 Root->getFlags().hasDisjoint();

15392 }

15393// Vector Widening Integer Add/Sub/Mul Instructions

15394caseRISCVISD::ADD_VL:

15395caseRISCVISD::MUL_VL:

15396caseRISCVISD::VWADD_W_VL:

15397caseRISCVISD::VWADDU_W_VL:

15398caseRISCVISD::SUB_VL:

15399caseRISCVISD::VWSUB_W_VL:

15400caseRISCVISD::VWSUBU_W_VL:

15401// Vector Widening Floating-Point Add/Sub/Mul Instructions

15402caseRISCVISD::FADD_VL:

15403caseRISCVISD::FSUB_VL:

15404caseRISCVISD::FMUL_VL:

15405caseRISCVISD::VFWADD_W_VL:

15406caseRISCVISD::VFWSUB_W_VL:

15407returntrue;

15408caseISD::SHL:

15409return Root->getValueType(0).isScalableVector() &&

15410 Subtarget.hasStdExtZvbb();

15411caseRISCVISD::SHL_VL:

15412return Subtarget.hasStdExtZvbb();

15413caseRISCVISD::VFMADD_VL:

15414caseRISCVISD::VFNMSUB_VL:

15415caseRISCVISD::VFNMADD_VL:

15416caseRISCVISD::VFMSUB_VL:

15417returntrue;

15418default:

15419returnfalse;

15420 }

15421 }

15422

15423 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).

15424 NodeExtensionHelper(SDNode *Root,unsigned OperandIdx,SelectionDAG &DAG,

15425constRISCVSubtarget &Subtarget) {

15426assert(isSupportedRoot(Root, Subtarget) &&

15427"Trying to build an helper with an "

15428"unsupported root");

15429assert(OperandIdx < 2 &&"Requesting something else than LHS or RHS");

15430assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0)));

15431 OrigOperand = Root->getOperand(OperandIdx);

15432

15433unsigned Opc = Root->getOpcode();

15434switch (Opc) {

15435// We consider

15436// VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))

15437// VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))

15438// VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))

15439caseRISCVISD::VWADD_W_VL:

15440caseRISCVISD::VWADDU_W_VL:

15441caseRISCVISD::VWSUB_W_VL:

15442caseRISCVISD::VWSUBU_W_VL:

15443caseRISCVISD::VFWADD_W_VL:

15444caseRISCVISD::VFWSUB_W_VL:

15445if (OperandIdx == 1) {

15446 SupportsZExt =

15447 Opc ==RISCVISD::VWADDU_W_VL || Opc ==RISCVISD::VWSUBU_W_VL;

15448 SupportsSExt =

15449 Opc ==RISCVISD::VWADD_W_VL || Opc ==RISCVISD::VWSUB_W_VL;

15450 SupportsFPExt =

15451 Opc ==RISCVISD::VFWADD_W_VL || Opc ==RISCVISD::VFWSUB_W_VL;

15452// There's no existing extension here, so we don't have to worry about

15453// making sure it gets removed.

15454 EnforceOneUse =false;

15455break;

15456 }

15457 [[fallthrough]];

15458default:

15459 fillUpExtensionSupport(Root, DAG, Subtarget);

15460break;

15461 }

15462 }

15463

15464 /// Helper function to get the Mask and VL from \p Root.

15465static std::pair<SDValue, SDValue>

15466 getMaskAndVL(constSDNode *Root,SelectionDAG &DAG,

15467constRISCVSubtarget &Subtarget) {

15468assert(isSupportedRoot(Root, Subtarget) &&"Unexpected root");

15469switch (Root->getOpcode()) {

15470caseISD::ADD:

15471caseISD::SUB:

15472caseISD::MUL:

15473caseISD::OR:

15474caseISD::SHL: {

15475SDLoc DL(Root);

15476MVT VT = Root->getSimpleValueType(0);

15477returngetDefaultScalableVLOps(VT,DL, DAG, Subtarget);

15478 }

15479default:

15480return std::make_pair(Root->getOperand(3), Root->getOperand(4));

15481 }

15482 }

15483

15484 /// Helper function to check if \p N is commutative with respect to the

15485 /// foldings that are supported by this class.

15486staticboolisCommutative(constSDNode *N) {

15487switch (N->getOpcode()) {

15488caseISD::ADD:

15489caseISD::MUL:

15490caseISD::OR:

15491caseRISCVISD::ADD_VL:

15492caseRISCVISD::MUL_VL:

15493caseRISCVISD::VWADD_W_VL:

15494caseRISCVISD::VWADDU_W_VL:

15495caseRISCVISD::FADD_VL:

15496caseRISCVISD::FMUL_VL:

15497caseRISCVISD::VFWADD_W_VL:

15498caseRISCVISD::VFMADD_VL:

15499caseRISCVISD::VFNMSUB_VL:

15500caseRISCVISD::VFNMADD_VL:

15501caseRISCVISD::VFMSUB_VL:

15502returntrue;

15503caseISD::SUB:

15504caseRISCVISD::SUB_VL:

15505caseRISCVISD::VWSUB_W_VL:

15506caseRISCVISD::VWSUBU_W_VL:

15507caseRISCVISD::FSUB_VL:

15508caseRISCVISD::VFWSUB_W_VL:

15509caseISD::SHL:

15510caseRISCVISD::SHL_VL:

15511returnfalse;

15512default:

15513llvm_unreachable("Unexpected opcode");

15514 }

15515 }

15516

15517 /// Get a list of combine to try for folding extensions in \p Root.

15518 /// Note that each returned CombineToTry function doesn't actually modify

15519 /// anything. Instead they produce an optional CombineResult that if not None,

15520 /// need to be materialized for the combine to be applied.

15521 /// \see CombineResult::materialize.

15522 /// If the related CombineToTry function returns std::nullopt, that means the

15523 /// combine didn't match.

15524staticSmallVector<CombineToTry> getSupportedFoldings(constSDNode *Root);

15525};

15526

15527/// Helper structure that holds all the necessary information to materialize a

15528/// combine that does some extension folding.

15529structCombineResult {

15530 /// Opcode to be generated when materializing the combine.

15531unsigned TargetOpcode;

15532// No value means no extension is needed.

15533 std::optional<ExtKind> LHSExt;

15534 std::optional<ExtKind> RHSExt;

15535 /// Root of the combine.

15536SDNode *Root;

15537 /// LHS of the TargetOpcode.

15538 NodeExtensionHelperLHS;

15539 /// RHS of the TargetOpcode.

15540 NodeExtensionHelperRHS;

15541

15542 CombineResult(unsigned TargetOpcode,SDNode *Root,

15543const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,

15544const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)

15545 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),

15546LHS(LHS),RHS(RHS) {}

15547

15548 /// Return a value that uses TargetOpcode and that can be used to replace

15549 /// Root.

15550 /// The actual replacement is *not* done in that method.

15551SDValue materialize(SelectionDAG &DAG,

15552constRISCVSubtarget &Subtarget) const{

15553SDValue Mask, VL, Passthru;

15554 std::tie(Mask, VL) =

15555 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);

15556switch (Root->getOpcode()) {

15557default:

15558 Passthru = Root->getOperand(2);

15559break;

15560caseISD::ADD:

15561caseISD::SUB:

15562caseISD::MUL:

15563caseISD::OR:

15564caseISD::SHL:

15565 Passthru = DAG.getUNDEF(Root->getValueType(0));

15566break;

15567 }

15568return DAG.getNode(TargetOpcode,SDLoc(Root), Root->getValueType(0),

15569LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),

15570RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),

15571 Passthru, Mask, VL);

15572 }

15573};

15574

15575/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))

15576/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both

15577/// are zext) and LHS and RHS can be folded into Root.

15578/// AllowExtMask define which form `ext` can take in this pattern.

15579///

15580/// \note If the pattern can match with both zext and sext, the returned

15581/// CombineResult will feature the zext result.

15582///

15583/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

15584/// can be used to apply the pattern.

15585static std::optional<CombineResult>

15586canFoldToVWWithSameExtensionImpl(SDNode *Root,const NodeExtensionHelper &LHS,

15587const NodeExtensionHelper &RHS,

15588uint8_t AllowExtMask,SelectionDAG &DAG,

15589constRISCVSubtarget &Subtarget) {

15590if ((AllowExtMask & ExtKind::ZExt) &&LHS.SupportsZExt &&RHS.SupportsZExt)

15591return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),

15592 Root, LHS,/*LHSExt=*/{ExtKind::ZExt}, RHS,

15593/*RHSExt=*/{ExtKind::ZExt});

15594if ((AllowExtMask & ExtKind::SExt) &&LHS.SupportsSExt &&RHS.SupportsSExt)

15595return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),

15596 Root, LHS,/*LHSExt=*/{ExtKind::SExt}, RHS,

15597/*RHSExt=*/{ExtKind::SExt});

15598if ((AllowExtMask & ExtKind::FPExt) &&LHS.SupportsFPExt &&RHS.SupportsFPExt)

15599return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),

15600 Root, LHS,/*LHSExt=*/{ExtKind::FPExt}, RHS,

15601/*RHSExt=*/{ExtKind::FPExt});

15602return std::nullopt;

15603}

15604

15605/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))

15606/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both

15607/// are zext) and LHS and RHS can be folded into Root.

15608///

15609/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

15610/// can be used to apply the pattern.

15611static std::optional<CombineResult>

15612canFoldToVWWithSameExtension(SDNode *Root,const NodeExtensionHelper &LHS,

15613const NodeExtensionHelper &RHS,SelectionDAG &DAG,

15614constRISCVSubtarget &Subtarget) {

15615return canFoldToVWWithSameExtensionImpl(

15616 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,

15617 Subtarget);

15618}

15619

15620/// Check if \p Root follows a pattern Root(LHS, ext(RHS))

15621///

15622/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

15623/// can be used to apply the pattern.

15624static std::optional<CombineResult>

15625canFoldToVW_W(SDNode *Root,const NodeExtensionHelper &LHS,

15626const NodeExtensionHelper &RHS,SelectionDAG &DAG,

15627constRISCVSubtarget &Subtarget) {

15628if (RHS.SupportsFPExt)

15629return CombineResult(

15630 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),

15631 Root, LHS,/*LHSExt=*/std::nullopt, RHS,/*RHSExt=*/{ExtKind::FPExt});

15632

15633// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar

15634// sext/zext?

15635// Control this behavior behind an option (AllowSplatInVW_W) for testing

15636// purposes.

15637if (RHS.SupportsZExt && (!RHS.isSplat() ||AllowSplatInVW_W))

15638return CombineResult(

15639 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,

15640 LHS,/*LHSExt=*/std::nullopt, RHS,/*RHSExt=*/{ExtKind::ZExt});

15641if (RHS.SupportsSExt && (!RHS.isSplat() ||AllowSplatInVW_W))

15642return CombineResult(

15643 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,

15644 LHS,/*LHSExt=*/std::nullopt, RHS,/*RHSExt=*/{ExtKind::SExt});

15645return std::nullopt;

15646}

15647

15648/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))

15649///

15650/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

15651/// can be used to apply the pattern.

15652static std::optional<CombineResult>

15653canFoldToVWWithSEXT(SDNode *Root,const NodeExtensionHelper &LHS,

15654const NodeExtensionHelper &RHS,SelectionDAG &DAG,

15655constRISCVSubtarget &Subtarget) {

15656return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,

15657 Subtarget);

15658}

15659

15660/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))

15661///

15662/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

15663/// can be used to apply the pattern.

15664static std::optional<CombineResult>

15665canFoldToVWWithZEXT(SDNode *Root,const NodeExtensionHelper &LHS,

15666const NodeExtensionHelper &RHS,SelectionDAG &DAG,

15667constRISCVSubtarget &Subtarget) {

15668return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,

15669 Subtarget);

15670}

15671

15672/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))

15673///

15674/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

15675/// can be used to apply the pattern.

15676static std::optional<CombineResult>

15677canFoldToVWWithFPEXT(SDNode *Root,const NodeExtensionHelper &LHS,

15678const NodeExtensionHelper &RHS,SelectionDAG &DAG,

15679constRISCVSubtarget &Subtarget) {

15680return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,

15681 Subtarget);

15682}

15683

15684/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))

15685///

15686/// \returns std::nullopt if the pattern doesn't match or a CombineResult that

15687/// can be used to apply the pattern.

15688static std::optional<CombineResult>

15689canFoldToVW_SU(SDNode *Root,const NodeExtensionHelper &LHS,

15690const NodeExtensionHelper &RHS,SelectionDAG &DAG,

15691constRISCVSubtarget &Subtarget) {

15692

15693if (!LHS.SupportsSExt || !RHS.SupportsZExt)

15694return std::nullopt;

15695return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),

15696 Root, LHS,/*LHSExt=*/{ExtKind::SExt}, RHS,

15697/*RHSExt=*/{ExtKind::ZExt});

15698}

15699

15700SmallVector<NodeExtensionHelper::CombineToTry>

15701NodeExtensionHelper::getSupportedFoldings(constSDNode *Root) {

15702SmallVector<CombineToTry> Strategies;

15703switch (Root->getOpcode()) {

15704caseISD::ADD:

15705caseISD::SUB:

15706caseISD::OR:

15707caseRISCVISD::ADD_VL:

15708caseRISCVISD::SUB_VL:

15709caseRISCVISD::FADD_VL:

15710caseRISCVISD::FSUB_VL:

15712 Strategies.push_back(canFoldToVWWithSameExtension);

15713// add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w

15714 Strategies.push_back(canFoldToVW_W);

15715break;

15716caseRISCVISD::FMUL_VL:

15717caseRISCVISD::VFMADD_VL:

15718caseRISCVISD::VFMSUB_VL:

15719caseRISCVISD::VFNMADD_VL:

15720caseRISCVISD::VFNMSUB_VL:

15721 Strategies.push_back(canFoldToVWWithSameExtension);

15722break;

15723caseISD::MUL:

15724caseRISCVISD::MUL_VL:

15725// mul -> vwmul(u)

15726 Strategies.push_back(canFoldToVWWithSameExtension);

15727// mul -> vwmulsu

15728 Strategies.push_back(canFoldToVW_SU);

15729break;

15730caseISD::SHL:

15731caseRISCVISD::SHL_VL:

15732// shl -> vwsll

15733 Strategies.push_back(canFoldToVWWithZEXT);

15734break;

15735caseRISCVISD::VWADD_W_VL:

15736caseRISCVISD::VWSUB_W_VL:

15737// vwadd_w|vwsub_w -> vwadd|vwsub

15738 Strategies.push_back(canFoldToVWWithSEXT);

15739break;

15740caseRISCVISD::VWADDU_W_VL:

15741caseRISCVISD::VWSUBU_W_VL:

15742// vwaddu_w|vwsubu_w -> vwaddu|vwsubu

15743 Strategies.push_back(canFoldToVWWithZEXT);

15744break;

15745caseRISCVISD::VFWADD_W_VL:

15746caseRISCVISD::VFWSUB_W_VL:

15747// vfwadd_w|vfwsub_w -> vfwadd|vfwsub

15748 Strategies.push_back(canFoldToVWWithFPEXT);

15749break;

15750default:

15751llvm_unreachable("Unexpected opcode");

15752 }

15753return Strategies;

15754}

15755}// End anonymous namespace.

15756

15757/// Combine a binary or FMA operation to its equivalent VW or VW_W form.

15758/// The supported combines are:

15759/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w

15760/// sub | sub_vl -> vwsub(u) | vwsub(u)_w

15761/// mul | mul_vl -> vwmul(u) | vwmul_su

15762/// shl | shl_vl -> vwsll

15763/// fadd_vl -> vfwadd | vfwadd_w

15764/// fsub_vl -> vfwsub | vfwsub_w

15765/// fmul_vl -> vfwmul

15766/// vwadd_w(u) -> vwadd(u)

15767/// vwsub_w(u) -> vwsub(u)

15768/// vfwadd_w -> vfwadd

15769/// vfwsub_w -> vfwsub

15770staticSDValue combineOp_VLToVWOp_VL(SDNode *N,

15771TargetLowering::DAGCombinerInfo &DCI,

15772constRISCVSubtarget &Subtarget) {

15773SelectionDAG &DAG = DCI.DAG;

15774if (DCI.isBeforeLegalize())

15775returnSDValue();

15776

15777if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))

15778returnSDValue();

15779

15780SmallVector<SDNode *> Worklist;

15781SmallSet<SDNode *, 8> Inserted;

15782 Worklist.push_back(N);

15783 Inserted.insert(N);

15784SmallVector<CombineResult> CombinesToApply;

15785

15786while (!Worklist.empty()) {

15787SDNode *Root = Worklist.pop_back_val();

15788

15789 NodeExtensionHelperLHS(Root, 0, DAG, Subtarget);

15790 NodeExtensionHelperRHS(Root, 1, DAG, Subtarget);

15791auto AppendUsersIfNeeded = [&Worklist, &Subtarget,

15792 &Inserted](const NodeExtensionHelper &Op) {

15793if (Op.needToPromoteOtherUsers()) {

15794for (SDUse &Use :Op.OrigOperand->uses()) {

15795SDNode *TheUser =Use.getUser();

15796if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))

15797returnfalse;

15798// We only support the first 2 operands of FMA.

15799if (Use.getOperandNo() >= 2)

15800returnfalse;

15801if (Inserted.insert(TheUser).second)

15802 Worklist.push_back(TheUser);

15803 }

15804 }

15805returntrue;

15806 };

15807

15808// Control the compile time by limiting the number of node we look at in

15809// total.

15810if (Inserted.size() >ExtensionMaxWebSize)

15811returnSDValue();

15812

15813SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =

15814 NodeExtensionHelper::getSupportedFoldings(Root);

15815

15816assert(!FoldingStrategies.empty() &&"Nothing to be folded");

15817bool Matched =false;

15818for (int Attempt = 0;

15819 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;

15820 ++Attempt) {

15821

15822for (NodeExtensionHelper::CombineToTry FoldingStrategy :

15823 FoldingStrategies) {

15824 std::optional<CombineResult> Res =

15825 FoldingStrategy(Root,LHS,RHS, DAG, Subtarget);

15826if (Res) {

15827 Matched =true;

15828 CombinesToApply.push_back(*Res);

15829// All the inputs that are extended need to be folded, otherwise

15830// we would be leaving the old input (since it is may still be used),

15831// and the new one.

15832if (Res->LHSExt.has_value())

15833if (!AppendUsersIfNeeded(LHS))

15834returnSDValue();

15835if (Res->RHSExt.has_value())

15836if (!AppendUsersIfNeeded(RHS))

15837returnSDValue();

15838break;

15839 }

15840 }

15841std::swap(LHS,RHS);

15842 }

15843// Right now we do an all or nothing approach.

15844if (!Matched)

15845returnSDValue();

15846 }

15847// Store the value for the replacement of the input node separately.

15848SDValue InputRootReplacement;

15849// We do the RAUW after we materialize all the combines, because some replaced

15850// nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,

15851// some of these nodes may appear in the NodeExtensionHelpers of some of the

15852// yet-to-be-visited CombinesToApply roots.

15853SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;

15854 ValuesToReplace.reserve(CombinesToApply.size());

15855for (CombineResult Res : CombinesToApply) {

15856SDValue NewValue = Res.materialize(DAG, Subtarget);

15857if (!InputRootReplacement) {

15858assert(Res.Root ==N &&

15859"First element is expected to be the current node");

15860 InputRootReplacement = NewValue;

15861 }else {

15862 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);

15863 }

15864 }

15865for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {

15866 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);

15867 DCI.AddToWorklist(OldNewValues.second.getNode());

15868 }

15869return InputRootReplacement;

15870}

15871

15872// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond

15873// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond

15874// y will be the Passthru and cond will be the Mask.

15875staticSDValue combineVWADDSUBWSelect(SDNode *N,SelectionDAG &DAG) {

15876unsigned Opc =N->getOpcode();

15877assert(Opc ==RISCVISD::VWADD_W_VL || Opc ==RISCVISD::VWADDU_W_VL ||

15878 Opc ==RISCVISD::VWSUB_W_VL || Opc ==RISCVISD::VWSUBU_W_VL);

15879

15880SDValue Y =N->getOperand(0);

15881SDValue MergeOp =N->getOperand(1);

15882unsigned MergeOpc = MergeOp.getOpcode();

15883

15884if (MergeOpc !=RISCVISD::VMERGE_VL && MergeOpc !=ISD::VSELECT)

15885returnSDValue();

15886

15887SDValue X = MergeOp->getOperand(1);

15888

15889if (!MergeOp.hasOneUse())

15890returnSDValue();

15891

15892// Passthru should be undef

15893SDValue Passthru =N->getOperand(2);

15894if (!Passthru.isUndef())

15895returnSDValue();

15896

15897// Mask should be all ones

15898SDValue Mask =N->getOperand(3);

15899if (Mask.getOpcode() !=RISCVISD::VMSET_VL)

15900returnSDValue();

15901

15902// False value of MergeOp should be all zeros

15903SDValue Z = MergeOp->getOperand(2);

15904

15905if (Z.getOpcode() ==ISD::INSERT_SUBVECTOR &&

15906 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))

15907 Z = Z.getOperand(1);

15908

15909if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))

15910returnSDValue();

15911

15912return DAG.getNode(Opc,SDLoc(N),N->getValueType(0),

15913 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},

15914N->getFlags());

15915}

15916

15917staticSDValue performVWADDSUBW_VLCombine(SDNode *N,

15918TargetLowering::DAGCombinerInfo &DCI,

15919constRISCVSubtarget &Subtarget) {

15920 [[maybe_unused]]unsigned Opc =N->getOpcode();

15921assert(Opc ==RISCVISD::VWADD_W_VL || Opc ==RISCVISD::VWADDU_W_VL ||

15922 Opc ==RISCVISD::VWSUB_W_VL || Opc ==RISCVISD::VWSUBU_W_VL);

15923

15924if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))

15925return V;

15926

15927returncombineVWADDSUBWSelect(N, DCI.DAG);

15928}

15929

15930// Helper function for performMemPairCombine.

15931// Try to combine the memory loads/stores LSNode1 and LSNode2

15932// into a single memory pair operation.

15933staticSDValue tryMemPairCombine(SelectionDAG &DAG,LSBaseSDNode *LSNode1,

15934LSBaseSDNode *LSNode2,SDValue BasePtr,

15935uint64_t Imm) {

15936SmallPtrSet<const SDNode *, 32> Visited;

15937SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};

15938

15939if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||

15940SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))

15941returnSDValue();

15942

15943MachineFunction &MF = DAG.getMachineFunction();

15944constRISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();

15945

15946// The new operation has twice the width.

15947MVT XLenVT = Subtarget.getXLenVT();

15948EVT MemVT = LSNode1->getMemoryVT();

15949EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;

15950MachineMemOperand *MMO = LSNode1->getMemOperand();

15951MachineMemOperand *NewMMO = MF.getMachineMemOperand(

15952 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);

15953

15954if (LSNode1->getOpcode() ==ISD::LOAD) {

15955auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();

15956unsigned Opcode;

15957if (MemVT == MVT::i32)

15958 Opcode = (Ext ==ISD::ZEXTLOAD) ?RISCVISD::TH_LWUD :RISCVISD::TH_LWD;

15959else

15960 Opcode =RISCVISD::TH_LDD;

15961

15962SDValue Res = DAG.getMemIntrinsicNode(

15963 Opcode,SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),

15964 {LSNode1->getChain(), BasePtr,

15965 DAG.getConstant(Imm,SDLoc(LSNode1), XLenVT)},

15966 NewMemVT, NewMMO);

15967

15968SDValue Node1 =

15969 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)},SDLoc(LSNode1));

15970SDValue Node2 =

15971 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)},SDLoc(LSNode2));

15972

15973 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());

15974return Node1;

15975 }else {

15976unsigned Opcode = (MemVT == MVT::i32) ?RISCVISD::TH_SWD :RISCVISD::TH_SDD;

15977

15978SDValue Res = DAG.getMemIntrinsicNode(

15979 Opcode,SDLoc(LSNode1), DAG.getVTList(MVT::Other),

15980 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),

15981 BasePtr, DAG.getConstant(Imm,SDLoc(LSNode1), XLenVT)},

15982 NewMemVT, NewMMO);

15983

15984 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());

15985return Res;

15986 }

15987}

15988

15989// Try to combine two adjacent loads/stores to a single pair instruction from

15990// the XTHeadMemPair vendor extension.

15991staticSDValue performMemPairCombine(SDNode *N,

15992TargetLowering::DAGCombinerInfo &DCI) {

15993SelectionDAG &DAG = DCI.DAG;

15994MachineFunction &MF = DAG.getMachineFunction();

15995constRISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();

15996

15997// Target does not support load/store pair.

15998if (!Subtarget.hasVendorXTHeadMemPair())

15999returnSDValue();

16000

16001LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);

16002EVT MemVT = LSNode1->getMemoryVT();

16003unsigned OpNum = LSNode1->getOpcode() ==ISD::LOAD ? 1 : 2;

16004

16005// No volatile, indexed or atomic loads/stores.

16006if (!LSNode1->isSimple() || LSNode1->isIndexed())

16007returnSDValue();

16008

16009// Function to get a base + constant representation from a memory value.

16010auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {

16011if (Ptr->getOpcode() ==ISD::ADD)

16012if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))

16013return {Ptr->getOperand(0), C1->getZExtValue()};

16014return {Ptr, 0};

16015 };

16016

16017auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));

16018

16019SDValue Chain =N->getOperand(0);

16020for (SDUse &Use : Chain->uses()) {

16021if (Use.getUser() !=N &&Use.getResNo() == 0 &&

16022Use.getUser()->getOpcode() ==N->getOpcode()) {

16023LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());

16024

16025// No volatile, indexed or atomic loads/stores.

16026if (!LSNode2->isSimple() || LSNode2->isIndexed())

16027continue;

16028

16029// Check if LSNode1 and LSNode2 have the same type and extension.

16030if (LSNode1->getOpcode() ==ISD::LOAD)

16031if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=

16032 cast<LoadSDNode>(LSNode1)->getExtensionType())

16033continue;

16034

16035if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())

16036continue;

16037

16038auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));

16039

16040// Check if the base pointer is the same for both instruction.

16041if (Base1 != Base2)

16042continue;

16043

16044// Check if the offsets match the XTHeadMemPair encoding contraints.

16045bool Valid =false;

16046if (MemVT == MVT::i32) {

16047// Check for adjacent i32 values and a 2-bit index.

16048if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))

16049 Valid =true;

16050 }elseif (MemVT == MVT::i64) {

16051// Check for adjacent i64 values and a 2-bit index.

16052if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))

16053 Valid =true;

16054 }

16055

16056if (!Valid)

16057continue;

16058

16059// Try to combine.

16060if (SDValue Res =

16061tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))

16062return Res;

16063 }

16064 }

16065

16066returnSDValue();

16067}

16068

16069// Fold

16070// (fp_to_int (froundeven X)) -> fcvt X, rne

16071// (fp_to_int (ftrunc X)) -> fcvt X, rtz

16072// (fp_to_int (ffloor X)) -> fcvt X, rdn

16073// (fp_to_int (fceil X)) -> fcvt X, rup

16074// (fp_to_int (fround X)) -> fcvt X, rmm

16075// (fp_to_int (frint X)) -> fcvt X

16076staticSDValue performFP_TO_INTCombine(SDNode *N,

16077TargetLowering::DAGCombinerInfo &DCI,

16078constRISCVSubtarget &Subtarget) {

16079SelectionDAG &DAG = DCI.DAG;

16080constTargetLowering &TLI = DAG.getTargetLoweringInfo();

16081MVT XLenVT = Subtarget.getXLenVT();

16082

16083SDValue Src =N->getOperand(0);

16084

16085// Don't do this for strict-fp Src.

16086if (Src->isStrictFPOpcode())

16087returnSDValue();

16088

16089// Ensure the FP type is legal.

16090if (!TLI.isTypeLegal(Src.getValueType()))

16091returnSDValue();

16092

16093// Don't do this for f16 with Zfhmin and not Zfh.

16094if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())

16095returnSDValue();

16096

16097RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Src.getOpcode());

16098// If the result is invalid, we didn't find a foldable instruction.

16099if (FRM ==RISCVFPRndMode::Invalid)

16100returnSDValue();

16101

16102SDLoc DL(N);

16103bool IsSigned =N->getOpcode() ==ISD::FP_TO_SINT;

16104EVT VT =N->getValueType(0);

16105

16106if (VT.isVector() && TLI.isTypeLegal(VT)) {

16107MVT SrcVT = Src.getSimpleValueType();

16108MVT SrcContainerVT = SrcVT;

16109MVT ContainerVT = VT.getSimpleVT();

16110SDValue XVal = Src.getOperand(0);

16111

16112// For widening and narrowing conversions we just combine it into a

16113// VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They

16114// end up getting lowered to their appropriate pseudo instructions based on

16115// their operand types

16116if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||

16117 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())

16118returnSDValue();

16119

16120// Make fixed-length vectors scalable first

16121if (SrcVT.isFixedLengthVector()) {

16122 SrcContainerVT =getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);

16123 XVal =convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);

16124 ContainerVT =

16125getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);

16126 }

16127

16128auto [Mask, VL] =

16129getDefaultVLOps(SrcVT, SrcContainerVT,DL, DAG, Subtarget);

16130

16131SDValue FpToInt;

16132if (FRM ==RISCVFPRndMode::RTZ) {

16133// Use the dedicated trunc static rounding mode if we're truncating so we

16134// don't need to generate calls to fsrmi/fsrm

16135unsigned Opc =

16136 IsSigned ?RISCVISD::VFCVT_RTZ_X_F_VL :RISCVISD::VFCVT_RTZ_XU_F_VL;

16137 FpToInt = DAG.getNode(Opc,DL, ContainerVT, XVal, Mask, VL);

16138 }else {

16139unsigned Opc =

16140 IsSigned ?RISCVISD::VFCVT_RM_X_F_VL :RISCVISD::VFCVT_RM_XU_F_VL;

16141 FpToInt = DAG.getNode(Opc,DL, ContainerVT, XVal, Mask,

16142 DAG.getTargetConstant(FRM,DL, XLenVT), VL);

16143 }

16144

16145// If converted from fixed-length to scalable, convert back

16146if (VT.isFixedLengthVector())

16147 FpToInt =convertFromScalableVector(VT, FpToInt, DAG, Subtarget);

16148

16149return FpToInt;

16150 }

16151

16152// Only handle XLen or i32 types. Other types narrower than XLen will

16153// eventually be legalized to XLenVT.

16154if (VT != MVT::i32 && VT != XLenVT)

16155returnSDValue();

16156

16157unsigned Opc;

16158if (VT == XLenVT)

16159 Opc = IsSigned ?RISCVISD::FCVT_X :RISCVISD::FCVT_XU;

16160else

16161 Opc = IsSigned ?RISCVISD::FCVT_W_RV64 :RISCVISD::FCVT_WU_RV64;

16162

16163SDValue FpToInt = DAG.getNode(Opc,DL, XLenVT, Src.getOperand(0),

16164 DAG.getTargetConstant(FRM,DL, XLenVT));

16165return DAG.getNode(ISD::TRUNCATE,DL, VT, FpToInt);

16166}

16167

16168// Fold

16169// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))

16170// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))

16171// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))

16172// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))

16173// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))

16174// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))

16175staticSDValue performFP_TO_INT_SATCombine(SDNode *N,

16176TargetLowering::DAGCombinerInfo &DCI,

16177constRISCVSubtarget &Subtarget) {

16178SelectionDAG &DAG = DCI.DAG;

16179constTargetLowering &TLI = DAG.getTargetLoweringInfo();

16180MVT XLenVT = Subtarget.getXLenVT();

16181

16182// Only handle XLen types. Other types narrower than XLen will eventually be

16183// legalized to XLenVT.

16184EVT DstVT =N->getValueType(0);

16185if (DstVT != XLenVT)

16186returnSDValue();

16187

16188SDValue Src =N->getOperand(0);

16189

16190// Don't do this for strict-fp Src.

16191if (Src->isStrictFPOpcode())

16192returnSDValue();

16193

16194// Ensure the FP type is also legal.

16195if (!TLI.isTypeLegal(Src.getValueType()))

16196returnSDValue();

16197

16198// Don't do this for f16 with Zfhmin and not Zfh.

16199if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())

16200returnSDValue();

16201

16202EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();

16203

16204RISCVFPRndMode::RoundingMode FRM =matchRoundingOp(Src.getOpcode());

16205if (FRM ==RISCVFPRndMode::Invalid)

16206returnSDValue();

16207

16208bool IsSigned =N->getOpcode() ==ISD::FP_TO_SINT_SAT;

16209

16210unsigned Opc;

16211if (SatVT == DstVT)

16212 Opc = IsSigned ?RISCVISD::FCVT_X :RISCVISD::FCVT_XU;

16213elseif (DstVT == MVT::i64 && SatVT == MVT::i32)

16214 Opc = IsSigned ?RISCVISD::FCVT_W_RV64 :RISCVISD::FCVT_WU_RV64;

16215else

16216returnSDValue();

16217// FIXME: Support other SatVTs by clamping before or after the conversion.

16218

16219 Src = Src.getOperand(0);

16220

16221SDLoc DL(N);

16222SDValue FpToInt = DAG.getNode(Opc,DL, XLenVT, Src,

16223 DAG.getTargetConstant(FRM,DL, XLenVT));

16224

16225// fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero

16226// extend.

16227if (Opc ==RISCVISD::FCVT_WU_RV64)

16228 FpToInt = DAG.getZeroExtendInReg(FpToInt,DL, MVT::i32);

16229

16230// RISC-V FP-to-int conversions saturate to the destination register size, but

16231// don't produce 0 for nan.

16232SDValue ZeroInt = DAG.getConstant(0,DL, DstVT);

16233return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,ISD::CondCode::SETUO);

16234}

16235

16236// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is

16237// smaller than XLenVT.

16238staticSDValue performBITREVERSECombine(SDNode *N,SelectionDAG &DAG,

16239constRISCVSubtarget &Subtarget) {

16240assert(Subtarget.hasStdExtZbkb() &&"Unexpected extension");

16241

16242SDValue Src =N->getOperand(0);

16243if (Src.getOpcode() !=ISD::BSWAP)

16244returnSDValue();

16245

16246EVT VT =N->getValueType(0);

16247if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||

16248 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))

16249returnSDValue();

16250

16251SDLoc DL(N);

16252return DAG.getNode(RISCVISD::BREV8,DL, VT, Src.getOperand(0));

16253}

16254

16255staticSDValue performVP_REVERSECombine(SDNode *N,SelectionDAG &DAG,

16256constRISCVSubtarget &Subtarget) {

16257// Fold:

16258// vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)

16259

16260// Check if its first operand is a vp.load.

16261auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));

16262if (!VPLoad)

16263returnSDValue();

16264

16265EVT LoadVT = VPLoad->getValueType(0);

16266// We do not have a strided_load version for masks, and the evl of vp.reverse

16267// and vp.load should always be the same.

16268if (!LoadVT.getVectorElementType().isByteSized() ||

16269N->getOperand(2) != VPLoad->getVectorLength() ||

16270 !N->getOperand(0).hasOneUse())

16271returnSDValue();

16272

16273// Check if the mask of outer vp.reverse are all 1's.

16274if (!isOneOrOneSplat(N->getOperand(1)))

16275returnSDValue();

16276

16277SDValue LoadMask = VPLoad->getMask();

16278// If Mask is all ones, then load is unmasked and can be reversed.

16279if (!isOneOrOneSplat(LoadMask)) {

16280// If the mask is not all ones, we can reverse the load if the mask was also

16281// reversed by an unmasked vp.reverse with the same EVL.

16282if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||

16283 !isOneOrOneSplat(LoadMask.getOperand(1)) ||

16284 LoadMask.getOperand(2) != VPLoad->getVectorLength())

16285returnSDValue();

16286 LoadMask = LoadMask.getOperand(0);

16287 }

16288

16289// Base = LoadAddr + (NumElem - 1) * ElemWidthByte

16290SDLoc DL(N);

16291MVT XLenVT = Subtarget.getXLenVT();

16292SDValue NumElem = VPLoad->getVectorLength();

16293uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;

16294

16295SDValue Temp1 = DAG.getNode(ISD::SUB,DL, XLenVT, NumElem,

16296 DAG.getConstant(1,DL, XLenVT));

16297SDValue Temp2 = DAG.getNode(ISD::MUL,DL, XLenVT, Temp1,

16298 DAG.getConstant(ElemWidthByte,DL, XLenVT));

16299SDValue Base = DAG.getNode(ISD::ADD,DL, XLenVT, VPLoad->getBasePtr(), Temp2);

16300SDValue Stride = DAG.getConstant(-ElemWidthByte,DL, XLenVT);

16301

16302MachineFunction &MF = DAG.getMachineFunction();

16303MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());

16304MachineMemOperand *MMO = MF.getMachineMemOperand(

16305 PtrInfo, VPLoad->getMemOperand()->getFlags(),

16306LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());

16307

16308SDValue Ret = DAG.getStridedLoadVP(

16309 LoadVT,DL, VPLoad->getChain(),Base, Stride, LoadMask,

16310 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());

16311

16312 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));

16313

16314return Ret;

16315}

16316

16317staticSDValue performVP_STORECombine(SDNode *N,SelectionDAG &DAG,

16318constRISCVSubtarget &Subtarget) {

16319// Fold:

16320// vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,

16321// -1, MASK)

16322auto *VPStore = cast<VPStoreSDNode>(N);

16323

16324if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)

16325returnSDValue();

16326

16327SDValue VPReverse = VPStore->getValue();

16328EVT ReverseVT = VPReverse->getValueType(0);

16329

16330// We do not have a strided_store version for masks, and the evl of vp.reverse

16331// and vp.store should always be the same.

16332if (!ReverseVT.getVectorElementType().isByteSized() ||

16333 VPStore->getVectorLength() != VPReverse.getOperand(2) ||

16334 !VPReverse.hasOneUse())

16335returnSDValue();

16336

16337SDValue StoreMask = VPStore->getMask();

16338// If Mask is all ones, then load is unmasked and can be reversed.

16339if (!isOneOrOneSplat(StoreMask)) {

16340// If the mask is not all ones, we can reverse the store if the mask was

16341// also reversed by an unmasked vp.reverse with the same EVL.

16342if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||

16343 !isOneOrOneSplat(StoreMask.getOperand(1)) ||

16344 StoreMask.getOperand(2) != VPStore->getVectorLength())

16345returnSDValue();

16346 StoreMask = StoreMask.getOperand(0);

16347 }

16348

16349// Base = StoreAddr + (NumElem - 1) * ElemWidthByte

16350SDLoc DL(N);

16351MVT XLenVT = Subtarget.getXLenVT();

16352SDValue NumElem = VPStore->getVectorLength();

16353uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;

16354

16355SDValue Temp1 = DAG.getNode(ISD::SUB,DL, XLenVT, NumElem,

16356 DAG.getConstant(1,DL, XLenVT));

16357SDValue Temp2 = DAG.getNode(ISD::MUL,DL, XLenVT, Temp1,

16358 DAG.getConstant(ElemWidthByte,DL, XLenVT));

16359SDValue Base =

16360 DAG.getNode(ISD::ADD,DL, XLenVT, VPStore->getBasePtr(), Temp2);

16361SDValue Stride = DAG.getConstant(-ElemWidthByte,DL, XLenVT);

16362

16363MachineFunction &MF = DAG.getMachineFunction();

16364MachinePointerInfo PtrInfo(VPStore->getAddressSpace());

16365MachineMemOperand *MMO = MF.getMachineMemOperand(

16366 PtrInfo, VPStore->getMemOperand()->getFlags(),

16367LocationSize::beforeOrAfterPointer(), VPStore->getAlign());

16368

16369return DAG.getStridedStoreVP(

16370 VPStore->getChain(),DL, VPReverse.getOperand(0),Base,

16371 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),

16372 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),

16373 VPStore->isTruncatingStore(), VPStore->isCompressingStore());

16374}

16375

16376// Convert from one FMA opcode to another based on whether we are negating the

16377// multiply result and/or the accumulator.

16378// NOTE: Only supports RVV operations with VL.

16379staticunsignednegateFMAOpcode(unsigned Opcode,bool NegMul,bool NegAcc) {

16380// Negating the multiply result changes ADD<->SUB and toggles 'N'.

16381if (NegMul) {

16382// clang-format off

16383switch (Opcode) {

16384default:llvm_unreachable("Unexpected opcode");

16385caseRISCVISD::VFMADD_VL: Opcode =RISCVISD::VFNMSUB_VL;break;

16386caseRISCVISD::VFNMSUB_VL: Opcode =RISCVISD::VFMADD_VL;break;

16387caseRISCVISD::VFNMADD_VL: Opcode =RISCVISD::VFMSUB_VL;break;

16388caseRISCVISD::VFMSUB_VL: Opcode =RISCVISD::VFNMADD_VL;break;

16389caseRISCVISD::STRICT_VFMADD_VL: Opcode =RISCVISD::STRICT_VFNMSUB_VL;break;

16390caseRISCVISD::STRICT_VFNMSUB_VL: Opcode =RISCVISD::STRICT_VFMADD_VL;break;

16391caseRISCVISD::STRICT_VFNMADD_VL: Opcode =RISCVISD::STRICT_VFMSUB_VL;break;

16392caseRISCVISD::STRICT_VFMSUB_VL: Opcode =RISCVISD::STRICT_VFNMADD_VL;break;

16393 }

16394// clang-format on

16395 }

16396

16397// Negating the accumulator changes ADD<->SUB.

16398if (NegAcc) {

16399// clang-format off

16400switch (Opcode) {

16401default:llvm_unreachable("Unexpected opcode");

16402caseRISCVISD::VFMADD_VL: Opcode =RISCVISD::VFMSUB_VL;break;

16403caseRISCVISD::VFMSUB_VL: Opcode =RISCVISD::VFMADD_VL;break;

16404caseRISCVISD::VFNMADD_VL: Opcode =RISCVISD::VFNMSUB_VL;break;

16405caseRISCVISD::VFNMSUB_VL: Opcode =RISCVISD::VFNMADD_VL;break;

16406caseRISCVISD::STRICT_VFMADD_VL: Opcode =RISCVISD::STRICT_VFMSUB_VL;break;

16407caseRISCVISD::STRICT_VFMSUB_VL: Opcode =RISCVISD::STRICT_VFMADD_VL;break;

16408caseRISCVISD::STRICT_VFNMADD_VL: Opcode =RISCVISD::STRICT_VFNMSUB_VL;break;

16409caseRISCVISD::STRICT_VFNMSUB_VL: Opcode =RISCVISD::STRICT_VFNMADD_VL;break;

16410 }

16411// clang-format on

16412 }

16413

16414return Opcode;

16415}

16416

16417staticSDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N,SelectionDAG &DAG) {

16418// Fold FNEG_VL into FMA opcodes.

16419// The first operand of strict-fp is chain.

16420bool IsStrict =

16421 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());

16422unsignedOffset = IsStrict ? 1 : 0;

16423SDValue A =N->getOperand(0 +Offset);

16424SDValue B =N->getOperand(1 +Offset);

16425SDValue C =N->getOperand(2 +Offset);

16426SDValue Mask =N->getOperand(3 +Offset);

16427SDValue VL =N->getOperand(4 +Offset);

16428

16429auto invertIfNegative = [&Mask, &VL](SDValue &V) {

16430if (V.getOpcode() ==RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&

16431 V.getOperand(2) == VL) {

16432// Return the negated input.

16433 V = V.getOperand(0);

16434returntrue;

16435 }

16436

16437returnfalse;

16438 };

16439

16440bool NegA = invertIfNegative(A);

16441bool NegB = invertIfNegative(B);

16442bool NegC = invertIfNegative(C);

16443

16444// If no operands are negated, we're done.

16445if (!NegA && !NegB && !NegC)

16446returnSDValue();

16447

16448unsigned NewOpcode =negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);

16449if (IsStrict)

16450return DAG.getNode(NewOpcode,SDLoc(N),N->getVTList(),

16451 {N->getOperand(0), A, B, C, Mask, VL});

16452return DAG.getNode(NewOpcode,SDLoc(N),N->getValueType(0),A,B,C, Mask,

16453 VL);

16454}

16455

16456staticSDValue performVFMADD_VLCombine(SDNode *N,

16457TargetLowering::DAGCombinerInfo &DCI,

16458constRISCVSubtarget &Subtarget) {

16459SelectionDAG &DAG = DCI.DAG;

16460

16461if (SDValue V =combineVFMADD_VLWithVFNEG_VL(N, DAG))

16462return V;

16463

16464// FIXME: Ignore strict opcodes for now.

16465if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))

16466returnSDValue();

16467

16468returncombineOp_VLToVWOp_VL(N, DCI, Subtarget);

16469}

16470

16471staticSDValue performSRACombine(SDNode *N,SelectionDAG &DAG,

16472constRISCVSubtarget &Subtarget) {

16473assert(N->getOpcode() ==ISD::SRA &&"Unexpected opcode");

16474

16475EVT VT =N->getValueType(0);

16476

16477if (VT != Subtarget.getXLenVT())

16478returnSDValue();

16479

16480if (!isa<ConstantSDNode>(N->getOperand(1)))

16481returnSDValue();

16482uint64_t ShAmt =N->getConstantOperandVal(1);

16483

16484SDValue N0 =N->getOperand(0);

16485

16486// Combine (sra (sext_inreg (shl X, C1), iX), C2) ->

16487// (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.

16488if (N0.getOpcode() ==ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {

16489unsigned ExtSize =

16490 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();

16491if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() ==ISD::SHL &&

16492 N0.getOperand(0).hasOneUse() &&

16493 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {

16494uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);

16495if (LShAmt < ExtSize) {

16496unsignedSize = VT.getSizeInBits();

16497SDLoc ShlDL(N0.getOperand(0));

16498SDValue Shl =

16499 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),

16500 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));

16501SDLoc DL(N);

16502return DAG.getNode(ISD::SRA,DL, VT, Shl,

16503 DAG.getConstant(ShAmt + (Size - ExtSize),DL, VT));

16504 }

16505 }

16506 }

16507

16508if (ShAmt > 32 || VT != MVT::i64)

16509returnSDValue();

16510

16511// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)

16512// FIXME: Should this be a generic combine? There's a similar combine on X86.

16513//

16514// Also try these folds where an add or sub is in the middle.

16515// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)

16516// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)

16517SDValue Shl;

16518ConstantSDNode *AddC =nullptr;

16519

16520// We might have an ADD or SUB between the SRA and SHL.

16521bool IsAdd = N0.getOpcode() ==ISD::ADD;

16522if ((IsAdd || N0.getOpcode() ==ISD::SUB)) {

16523// Other operand needs to be a constant we can modify.

16524 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));

16525if (!AddC)

16526returnSDValue();

16527

16528// AddC needs to have at least 32 trailing zeros.

16529if (llvm::countr_zero(AddC->getZExtValue()) < 32)

16530returnSDValue();

16531

16532// All users should be a shift by constant less than or equal to 32. This

16533// ensures we'll do this optimization for each of them to produce an

16534// add/sub+sext_inreg they can all share.

16535for (SDNode *U : N0->users()) {

16536if (U->getOpcode() !=ISD::SRA ||

16537 !isa<ConstantSDNode>(U->getOperand(1)) ||

16538 U->getConstantOperandVal(1) > 32)

16539returnSDValue();

16540 }

16541

16542 Shl = N0.getOperand(IsAdd ? 0 : 1);

16543 }else {

16544// Not an ADD or SUB.

16545 Shl = N0;

16546 }

16547

16548// Look for a shift left by 32.

16549if (Shl.getOpcode() !=ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||

16550 Shl.getConstantOperandVal(1) != 32)

16551returnSDValue();

16552

16553// We if we didn't look through an add/sub, then the shl should have one use.

16554// If we did look through an add/sub, the sext_inreg we create is free so

16555// we're only creating 2 new instructions. It's enough to only remove the

16556// original sra+add/sub.

16557if (!AddC && !Shl.hasOneUse())

16558returnSDValue();

16559

16560SDLoc DL(N);

16561SDValue In = Shl.getOperand(0);

16562

16563// If we looked through an ADD or SUB, we need to rebuild it with the shifted

16564// constant.

16565if (AddC) {

16566SDValue ShiftedAddC =

16567 DAG.getConstant(AddC->getZExtValue() >> 32,DL, MVT::i64);

16568if (IsAdd)

16569 In = DAG.getNode(ISD::ADD,DL, MVT::i64, In, ShiftedAddC);

16570else

16571 In = DAG.getNode(ISD::SUB,DL, MVT::i64, ShiftedAddC, In);

16572 }

16573

16574SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG,DL, MVT::i64, In,

16575 DAG.getValueType(MVT::i32));

16576if (ShAmt == 32)

16577return SExt;

16578

16579return DAG.getNode(

16580ISD::SHL,DL, MVT::i64, SExt,

16581 DAG.getConstant(32 - ShAmt,DL, MVT::i64));

16582}

16583

16584// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if

16585// the result is used as the conditon of a br_cc or select_cc we can invert,

16586// inverting the setcc is free, and Z is 0/1. Caller will invert the

16587// br_cc/select_cc.

16588staticSDValue tryDemorganOfBooleanCondition(SDValue Cond,SelectionDAG &DAG) {

16589bool IsAnd =Cond.getOpcode() ==ISD::AND;

16590if (!IsAnd &&Cond.getOpcode() !=ISD::OR)

16591returnSDValue();

16592

16593if (!Cond.hasOneUse())

16594returnSDValue();

16595

16596SDValue Setcc =Cond.getOperand(0);

16597SDValue Xor =Cond.getOperand(1);

16598// Canonicalize setcc to LHS.

16599if (Setcc.getOpcode() !=ISD::SETCC)

16600std::swap(Setcc,Xor);

16601// LHS should be a setcc and RHS should be an xor.

16602if (Setcc.getOpcode() !=ISD::SETCC || !Setcc.hasOneUse() ||

16603Xor.getOpcode() !=ISD::XOR || !Xor.hasOneUse())

16604returnSDValue();

16605

16606// If the condition is an And, SimplifyDemandedBits may have changed

16607// (xor Z, 1) to (not Z).

16608SDValue Xor1 =Xor.getOperand(1);

16609if (!isOneConstant(Xor1) && !(IsAnd &&isAllOnesConstant(Xor1)))

16610returnSDValue();

16611

16612EVT VT =Cond.getValueType();

16613SDValue Xor0 =Xor.getOperand(0);

16614

16615// The LHS of the xor needs to be 0/1.

16616APInt Mask =APInt::getBitsSetFrom(VT.getSizeInBits(), 1);

16617if (!DAG.MaskedValueIsZero(Xor0, Mask))

16618returnSDValue();

16619

16620// We can only invert integer setccs.

16621EVT SetCCOpVT = Setcc.getOperand(0).getValueType();

16622if (!SetCCOpVT.isScalarInteger())

16623returnSDValue();

16624

16625ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();

16626if (ISD::isIntEqualitySetCC(CCVal)) {

16627 CCVal =ISD::getSetCCInverse(CCVal, SetCCOpVT);

16628 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),

16629 Setcc.getOperand(1), CCVal);

16630 }elseif (CCVal ==ISD::SETLT &&isNullConstant(Setcc.getOperand(0))) {

16631// Invert (setlt 0, X) by converting to (setlt X, 1).

16632 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),

16633 DAG.getConstant(1,SDLoc(Setcc), VT), CCVal);

16634 }elseif (CCVal ==ISD::SETLT &&isOneConstant(Setcc.getOperand(1))) {

16635// (setlt X, 1) by converting to (setlt 0, X).

16636 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,

16637 DAG.getConstant(0,SDLoc(Setcc), VT),

16638 Setcc.getOperand(0), CCVal);

16639 }else

16640returnSDValue();

16641

16642unsigned Opc = IsAnd ?ISD::OR :ISD::AND;

16643return DAG.getNode(Opc,SDLoc(Cond), VT, Setcc,Xor.getOperand(0));

16644}

16645

16646// Perform common combines for BR_CC and SELECT_CC condtions.

16647staticboolcombine_CC(SDValue &LHS,SDValue &RHS,SDValue &CC,constSDLoc &DL,

16648SelectionDAG &DAG,constRISCVSubtarget &Subtarget) {

16649ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();

16650

16651// As far as arithmetic right shift always saves the sign,

16652// shift can be omitted.

16653// Fold setlt (sra X, N), 0 -> setlt X, 0 and

16654// setge (sra X, N), 0 -> setge X, 0

16655if (isNullConstant(RHS) && (CCVal ==ISD::SETGE || CCVal ==ISD::SETLT) &&

16656LHS.getOpcode() ==ISD::SRA) {

16657LHS =LHS.getOperand(0);

16658returntrue;

16659 }

16660

16661if (!ISD::isIntEqualitySetCC(CCVal))

16662returnfalse;

16663

16664// Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)

16665// Sometimes the setcc is introduced after br_cc/select_cc has been formed.

16666if (LHS.getOpcode() ==ISD::SETCC &&isNullConstant(RHS) &&

16667LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {

16668// If we're looking for eq 0 instead of ne 0, we need to invert the

16669// condition.

16670bool Invert = CCVal ==ISD::SETEQ;

16671 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();

16672if (Invert)

16673 CCVal =ISD::getSetCCInverse(CCVal,LHS.getValueType());

16674

16675RHS =LHS.getOperand(1);

16676LHS =LHS.getOperand(0);

16677translateSetCCForBranch(DL,LHS,RHS, CCVal, DAG);

16678

16679CC = DAG.getCondCode(CCVal);

16680returntrue;

16681 }

16682

16683// Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)

16684if (LHS.getOpcode() ==ISD::XOR &&isNullConstant(RHS)) {

16685RHS =LHS.getOperand(1);

16686LHS =LHS.getOperand(0);

16687returntrue;

16688 }

16689

16690// Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)

16691if (isNullConstant(RHS) &&LHS.getOpcode() ==ISD::SRL &&LHS.hasOneUse() &&

16692LHS.getOperand(1).getOpcode() ==ISD::Constant) {

16693SDValue LHS0 =LHS.getOperand(0);

16694if (LHS0.getOpcode() ==ISD::AND &&

16695 LHS0.getOperand(1).getOpcode() ==ISD::Constant) {

16696uint64_t Mask = LHS0.getConstantOperandVal(1);

16697uint64_t ShAmt =LHS.getConstantOperandVal(1);

16698if (isPowerOf2_64(Mask) &&Log2_64(Mask) == ShAmt) {

16699 CCVal = CCVal ==ISD::SETEQ ?ISD::SETGE :ISD::SETLT;

16700CC = DAG.getCondCode(CCVal);

16701

16702 ShAmt =LHS.getValueSizeInBits() - 1 - ShAmt;

16703LHS = LHS0.getOperand(0);

16704if (ShAmt != 0)

16705LHS =

16706 DAG.getNode(ISD::SHL,DL,LHS.getValueType(), LHS0.getOperand(0),

16707 DAG.getConstant(ShAmt,DL,LHS.getValueType()));

16708returntrue;

16709 }

16710 }

16711 }

16712

16713// (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.

16714// This can occur when legalizing some floating point comparisons.

16715APInt Mask =APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);

16716if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {

16717 CCVal =ISD::getSetCCInverse(CCVal,LHS.getValueType());

16718CC = DAG.getCondCode(CCVal);

16719RHS = DAG.getConstant(0,DL,LHS.getValueType());

16720returntrue;

16721 }

16722

16723if (isNullConstant(RHS)) {

16724if (SDValue NewCond =tryDemorganOfBooleanCondition(LHS, DAG)) {

16725 CCVal =ISD::getSetCCInverse(CCVal,LHS.getValueType());

16726CC = DAG.getCondCode(CCVal);

16727LHS = NewCond;

16728returntrue;

16729 }

16730 }

16731

16732returnfalse;

16733}

16734

16735// Fold

16736// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).

16737// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).

16738// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).

16739// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).

16740staticSDValue tryFoldSelectIntoOp(SDNode *N,SelectionDAG &DAG,

16741SDValue TrueVal,SDValue FalseVal,

16742bool Swapped) {

16743bool Commutative =true;

16744unsigned Opc = TrueVal.getOpcode();

16745switch (Opc) {

16746default:

16747returnSDValue();

16748caseISD::SHL:

16749caseISD::SRA:

16750caseISD::SRL:

16751caseISD::SUB:

16752 Commutative =false;

16753break;

16754caseISD::ADD:

16755caseISD::OR:

16756caseISD::XOR:

16757break;

16758 }

16759

16760if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))

16761returnSDValue();

16762

16763unsigned OpToFold;

16764if (FalseVal == TrueVal.getOperand(0))

16765 OpToFold = 0;

16766elseif (Commutative && FalseVal == TrueVal.getOperand(1))

16767 OpToFold = 1;

16768else

16769returnSDValue();

16770

16771EVT VT =N->getValueType(0);

16772SDLoc DL(N);

16773SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);

16774EVT OtherOpVT = OtherOp.getValueType();

16775SDValue IdentityOperand =

16776 DAG.getNeutralElement(Opc,DL, OtherOpVT,N->getFlags());

16777if (!Commutative)

16778 IdentityOperand = DAG.getConstant(0,DL, OtherOpVT);

16779assert(IdentityOperand &&"No identity operand!");

16780

16781if (Swapped)

16782std::swap(OtherOp, IdentityOperand);

16783SDValue NewSel =

16784 DAG.getSelect(DL, OtherOpVT,N->getOperand(0), OtherOp, IdentityOperand);

16785return DAG.getNode(TrueVal.getOpcode(),DL, VT, FalseVal, NewSel);

16786}

16787

16788// This tries to get rid of `select` and `icmp` that are being used to handle

16789// `Targets` that do not support `cttz(0)`/`ctlz(0)`.

16790staticSDValue foldSelectOfCTTZOrCTLZ(SDNode *N,SelectionDAG &DAG) {

16791SDValue Cond =N->getOperand(0);

16792

16793// This represents either CTTZ or CTLZ instruction.

16794SDValue CountZeroes;

16795

16796SDValue ValOnZero;

16797

16798if (Cond.getOpcode() !=ISD::SETCC)

16799returnSDValue();

16800

16801if (!isNullConstant(Cond->getOperand(1)))

16802returnSDValue();

16803

16804ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();

16805if (CCVal ==ISD::CondCode::SETEQ) {

16806 CountZeroes =N->getOperand(2);

16807 ValOnZero =N->getOperand(1);

16808 }elseif (CCVal ==ISD::CondCode::SETNE) {

16809 CountZeroes =N->getOperand(1);

16810 ValOnZero =N->getOperand(2);

16811 }else {

16812returnSDValue();

16813 }

16814

16815if (CountZeroes.getOpcode() ==ISD::TRUNCATE ||

16816 CountZeroes.getOpcode() ==ISD::ZERO_EXTEND)

16817 CountZeroes = CountZeroes.getOperand(0);

16818

16819if (CountZeroes.getOpcode() !=ISD::CTTZ &&

16820 CountZeroes.getOpcode() !=ISD::CTTZ_ZERO_UNDEF &&

16821 CountZeroes.getOpcode() !=ISD::CTLZ &&

16822 CountZeroes.getOpcode() !=ISD::CTLZ_ZERO_UNDEF)

16823returnSDValue();

16824

16825if (!isNullConstant(ValOnZero))

16826returnSDValue();

16827

16828SDValue CountZeroesArgument = CountZeroes->getOperand(0);

16829if (Cond->getOperand(0) != CountZeroesArgument)

16830returnSDValue();

16831

16832if (CountZeroes.getOpcode() ==ISD::CTTZ_ZERO_UNDEF) {

16833 CountZeroes = DAG.getNode(ISD::CTTZ,SDLoc(CountZeroes),

16834 CountZeroes.getValueType(), CountZeroesArgument);

16835 }elseif (CountZeroes.getOpcode() ==ISD::CTLZ_ZERO_UNDEF) {

16836 CountZeroes = DAG.getNode(ISD::CTLZ,SDLoc(CountZeroes),

16837 CountZeroes.getValueType(), CountZeroesArgument);

16838 }

16839

16840unsignedBitWidth = CountZeroes.getValueSizeInBits();

16841SDValue BitWidthMinusOne =

16842 DAG.getConstant(BitWidth - 1,SDLoc(N), CountZeroes.getValueType());

16843

16844auto AndNode = DAG.getNode(ISD::AND,SDLoc(N), CountZeroes.getValueType(),

16845 CountZeroes, BitWidthMinusOne);

16846return DAG.getZExtOrTrunc(AndNode,SDLoc(N),N->getValueType(0));

16847}

16848

16849staticSDValue useInversedSetcc(SDNode *N,SelectionDAG &DAG,

16850constRISCVSubtarget &Subtarget) {

16851SDValue Cond =N->getOperand(0);

16852SDValue True =N->getOperand(1);

16853SDValue False =N->getOperand(2);

16854SDLoc DL(N);

16855EVT VT =N->getValueType(0);

16856EVT CondVT =Cond.getValueType();

16857

16858if (Cond.getOpcode() !=ISD::SETCC || !Cond.hasOneUse())

16859returnSDValue();

16860

16861// Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate

16862// BEXTI, where C is power of 2.

16863if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&

16864 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {

16865SDValue LHS =Cond.getOperand(0);

16866SDValue RHS =Cond.getOperand(1);

16867ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

16868if (CC ==ISD::SETEQ &&LHS.getOpcode() ==ISD::AND &&

16869 isa<ConstantSDNode>(LHS.getOperand(1)) &&isNullConstant(RHS)) {

16870constAPInt &MaskVal =LHS.getConstantOperandAPInt(1);

16871if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))

16872return DAG.getSelect(DL, VT,

16873 DAG.getSetCC(DL, CondVT,LHS,RHS,ISD::SETNE),

16874 False, True);

16875 }

16876 }

16877returnSDValue();

16878}

16879

16880staticboolmatchSelectAddSub(SDValue TrueVal,SDValue FalseVal,bool &SwapCC) {

16881if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())

16882returnfalse;

16883

16884 SwapCC =false;

16885if (TrueVal.getOpcode() ==ISD::SUB && FalseVal.getOpcode() ==ISD::ADD) {

16886std::swap(TrueVal, FalseVal);

16887 SwapCC =true;

16888 }

16889

16890if (TrueVal.getOpcode() !=ISD::ADD || FalseVal.getOpcode() !=ISD::SUB)

16891returnfalse;

16892

16893SDValue A = FalseVal.getOperand(0);

16894SDValue B = FalseVal.getOperand(1);

16895// Add is commutative, so check both orders

16896return ((TrueVal.getOperand(0) ==A && TrueVal.getOperand(1) ==B) ||

16897 (TrueVal.getOperand(1) ==A && TrueVal.getOperand(0) ==B));

16898}

16899

16900/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).

16901/// This allows us match a vadd.vv fed by a masked vrsub, which reduces

16902/// register pressure over the add followed by masked vsub sequence.

16903staticSDValue performVSELECTCombine(SDNode *N,SelectionDAG &DAG) {

16904SDLoc DL(N);

16905EVT VT =N->getValueType(0);

16906SDValue CC =N->getOperand(0);

16907SDValue TrueVal =N->getOperand(1);

16908SDValue FalseVal =N->getOperand(2);

16909

16910bool SwapCC;

16911if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))

16912returnSDValue();

16913

16914SDValue Sub = SwapCC ? TrueVal : FalseVal;

16915SDValue A = Sub.getOperand(0);

16916SDValue B = Sub.getOperand(1);

16917

16918// Arrange the select such that we can match a masked

16919// vrsub.vi to perform the conditional negate

16920SDValue NegB = DAG.getNegative(B,DL, VT);

16921if (!SwapCC)

16922CC = DAG.getLogicalNOT(DL,CC,CC->getValueType(0));

16923SDValue NewB = DAG.getNode(ISD::VSELECT,DL, VT,CC, NegB,B);

16924return DAG.getNode(ISD::ADD,DL, VT,A, NewB);

16925}

16926

16927staticSDValue performSELECTCombine(SDNode *N,SelectionDAG &DAG,

16928constRISCVSubtarget &Subtarget) {

16929if (SDValue Folded =foldSelectOfCTTZOrCTLZ(N, DAG))

16930return Folded;

16931

16932if (SDValue V =useInversedSetcc(N, DAG, Subtarget))

16933return V;

16934

16935if (Subtarget.hasConditionalMoveFusion())

16936returnSDValue();

16937

16938SDValue TrueVal =N->getOperand(1);

16939SDValue FalseVal =N->getOperand(2);

16940if (SDValue V =tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal,/*Swapped*/false))

16941return V;

16942returntryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal,/*Swapped*/true);

16943}

16944

16945/// If we have a build_vector where each lane is binop X, C, where C

16946/// is a constant (but not necessarily the same constant on all lanes),

16947/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).

16948/// We assume that materializing a constant build vector will be no more

16949/// expensive that performing O(n) binops.

16950staticSDValue performBUILD_VECTORCombine(SDNode *N,SelectionDAG &DAG,

16951constRISCVSubtarget &Subtarget,

16952constRISCVTargetLowering &TLI) {

16953SDLoc DL(N);

16954EVT VT =N->getValueType(0);

16955

16956assert(!VT.isScalableVector() &&"unexpected build vector");

16957

16958if (VT.getVectorNumElements() == 1)

16959returnSDValue();

16960

16961constunsigned Opcode =N->op_begin()->getNode()->getOpcode();

16962if (!TLI.isBinOp(Opcode))

16963returnSDValue();

16964

16965if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))

16966returnSDValue();

16967

16968// This BUILD_VECTOR involves an implicit truncation, and sinking

16969// truncates through binops is non-trivial.

16970if (N->op_begin()->getValueType() != VT.getVectorElementType())

16971returnSDValue();

16972

16973SmallVector<SDValue> LHSOps;

16974SmallVector<SDValue> RHSOps;

16975for (SDValue Op :N->ops()) {

16976if (Op.isUndef()) {

16977// We can't form a divide or remainder from undef.

16978if (!DAG.isSafeToSpeculativelyExecute(Opcode))

16979returnSDValue();

16980

16981 LHSOps.push_back(Op);

16982 RHSOps.push_back(Op);

16983continue;

16984 }

16985

16986// TODO: We can handle operations which have an neutral rhs value

16987// (e.g. x + 0, a * 1 or a << 0), but we then have to keep track

16988// of profit in a more explicit manner.

16989if (Op.getOpcode() != Opcode || !Op.hasOneUse())

16990returnSDValue();

16991

16992 LHSOps.push_back(Op.getOperand(0));

16993if (!isa<ConstantSDNode>(Op.getOperand(1)) &&

16994 !isa<ConstantFPSDNode>(Op.getOperand(1)))

16995returnSDValue();

16996// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may

16997// have different LHS and RHS types.

16998if (Op.getOperand(0).getValueType() !=Op.getOperand(1).getValueType())

16999returnSDValue();

17000

17001 RHSOps.push_back(Op.getOperand(1));

17002 }

17003

17004return DAG.getNode(Opcode,DL, VT, DAG.getBuildVector(VT,DL, LHSOps),

17005 DAG.getBuildVector(VT,DL, RHSOps));

17006}

17007

17008staticSDValue performINSERT_VECTOR_ELTCombine(SDNode *N,SelectionDAG &DAG,

17009constRISCVSubtarget &Subtarget,

17010constRISCVTargetLowering &TLI) {

17011SDValue InVec =N->getOperand(0);

17012SDValue InVal =N->getOperand(1);

17013SDValue EltNo =N->getOperand(2);

17014SDLoc DL(N);

17015

17016EVT VT = InVec.getValueType();

17017if (VT.isScalableVector())

17018returnSDValue();

17019

17020if (!InVec.hasOneUse())

17021returnSDValue();

17022

17023// Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt

17024// move the insert_vector_elts into the arms of the binop. Note that

17025// the new RHS must be a constant.

17026constunsigned InVecOpcode = InVec->getOpcode();

17027if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&

17028 InVal.hasOneUse()) {

17029SDValue InVecLHS = InVec->getOperand(0);

17030SDValue InVecRHS = InVec->getOperand(1);

17031SDValue InValLHS = InVal->getOperand(0);

17032SDValue InValRHS = InVal->getOperand(1);

17033

17034if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))

17035returnSDValue();

17036if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))

17037returnSDValue();

17038// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may

17039// have different LHS and RHS types.

17040if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())

17041returnSDValue();

17042SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, VT,

17043 InVecLHS, InValLHS, EltNo);

17044SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, VT,

17045 InVecRHS, InValRHS, EltNo);

17046return DAG.getNode(InVecOpcode,DL, VT,LHS,RHS);

17047 }

17048

17049// Given insert_vector_elt (concat_vectors ...), InVal, Elt

17050// move the insert_vector_elt to the source operand of the concat_vector.

17051if (InVec.getOpcode() !=ISD::CONCAT_VECTORS)

17052returnSDValue();

17053

17054auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);

17055if (!IndexC)

17056returnSDValue();

17057unsigned Elt = IndexC->getZExtValue();

17058

17059EVT ConcatVT = InVec.getOperand(0).getValueType();

17060if (ConcatVT.getVectorElementType() != InVal.getValueType())

17061returnSDValue();

17062unsigned ConcatNumElts = ConcatVT.getVectorNumElements();

17063SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts,DL);

17064

17065unsigned ConcatOpIdx = Elt / ConcatNumElts;

17066SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);

17067 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT,DL, ConcatVT,

17068 ConcatOp, InVal, NewIdx);

17069

17070SmallVector<SDValue> ConcatOps;

17071 ConcatOps.append(InVec->op_begin(), InVec->op_end());

17072 ConcatOps[ConcatOpIdx] = ConcatOp;

17073return DAG.getNode(ISD::CONCAT_VECTORS,DL, VT, ConcatOps);

17074}

17075

17076// If we're concatenating a series of vector loads like

17077// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...

17078// Then we can turn this into a strided load by widening the vector elements

17079// vlse32 p, stride=n

17080staticSDValue performCONCAT_VECTORSCombine(SDNode *N,SelectionDAG &DAG,

17081constRISCVSubtarget &Subtarget,

17082constRISCVTargetLowering &TLI) {

17083SDLoc DL(N);

17084EVT VT =N->getValueType(0);

17085

17086// Only perform this combine on legal MVTs.

17087if (!TLI.isTypeLegal(VT))

17088returnSDValue();

17089

17090// TODO: Potentially extend this to scalable vectors

17091if (VT.isScalableVector())

17092returnSDValue();

17093

17094auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));

17095if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||

17096 !SDValue(BaseLd, 0).hasOneUse())

17097returnSDValue();

17098

17099EVT BaseLdVT = BaseLd->getValueType(0);

17100

17101// Go through the loads and check that they're strided

17102SmallVector<LoadSDNode *> Lds;

17103 Lds.push_back(BaseLd);

17104Align Align = BaseLd->getAlign();

17105for (SDValue Op :N->ops().drop_front()) {

17106auto *Ld = dyn_cast<LoadSDNode>(Op);

17107if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||

17108 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||

17109 Ld->getValueType(0) != BaseLdVT)

17110returnSDValue();

17111

17112 Lds.push_back(Ld);

17113

17114// The common alignment is the most restrictive (smallest) of all the loads

17115Align = std::min(Align, Ld->getAlign());

17116 }

17117

17118usingPtrDiff = std::pair<std::variant<int64_t, SDValue>,bool>;

17119auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,

17120LoadSDNode *Ld2) -> std::optional<PtrDiff> {

17121// If the load ptrs can be decomposed into a common (Base + Index) with a

17122// common constant stride, then return the constant stride.

17123BaseIndexOffset BIO1 =BaseIndexOffset::match(Ld1, DAG);

17124BaseIndexOffset BIO2 =BaseIndexOffset::match(Ld2, DAG);

17125if (BIO1.equalBaseIndex(BIO2, DAG))

17126return {{BIO2.getOffset() - BIO1.getOffset(),false}};

17127

17128// Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)

17129SDValue P1 = Ld1->getBasePtr();

17130SDValue P2 = Ld2->getBasePtr();

17131if (P2.getOpcode() ==ISD::ADD && P2.getOperand(0) == P1)

17132return {{P2.getOperand(1),false}};

17133if (P1.getOpcode() ==ISD::ADD && P1.getOperand(0) == P2)

17134return {{P1.getOperand(1),true}};

17135

17136return std::nullopt;

17137 };

17138

17139// Get the distance between the first and second loads

17140auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);

17141if (!BaseDiff)

17142returnSDValue();

17143

17144// Check all the loads are the same distance apart

17145for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)

17146if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)

17147returnSDValue();

17148

17149// TODO: At this point, we've successfully matched a generalized gather

17150// load. Maybe we should emit that, and then move the specialized

17151// matchers above and below into a DAG combine?

17152

17153// Get the widened scalar type, e.g. v4i8 -> i64

17154unsigned WideScalarBitWidth =

17155 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();

17156MVT WideScalarVT =MVT::getIntegerVT(WideScalarBitWidth);

17157

17158// Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64

17159MVT WideVecVT =MVT::getVectorVT(WideScalarVT,N->getNumOperands());

17160if (!TLI.isTypeLegal(WideVecVT))

17161returnSDValue();

17162

17163// Check that the operation is legal

17164if (!TLI.isLegalStridedLoadStore(WideVecVT,Align))

17165returnSDValue();

17166

17167auto [StrideVariant, MustNegateStride] = *BaseDiff;

17168SDValue Stride =

17169 std::holds_alternative<SDValue>(StrideVariant)

17170 ? std::get<SDValue>(StrideVariant)

17171 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant),DL,

17172 Lds[0]->getOffset().getValueType());

17173if (MustNegateStride)

17174 Stride = DAG.getNegative(Stride,DL, Stride.getValueType());

17175

17176SDValue AllOneMask =

17177 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1),DL,

17178 DAG.getConstant(1,DL, MVT::i1));

17179

17180uint64_t MemSize;

17181if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);

17182 ConstStride && ConstStride->getSExtValue() >= 0)

17183// total size = (elsize * n) + (stride - elsize) * (n-1)

17184// = elsize + stride * (n-1)

17185 MemSize = WideScalarVT.getSizeInBits() +

17186 ConstStride->getSExtValue() * (N->getNumOperands() - 1);

17187else

17188// If Stride isn't constant, then we can't know how much it will load

17189 MemSize =MemoryLocation::UnknownSize;

17190

17191MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(

17192 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,

17193Align);

17194

17195SDValue StridedLoad = DAG.getStridedLoadVP(

17196 WideVecVT,DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,

17197 AllOneMask,

17198 DAG.getConstant(N->getNumOperands(),DL, Subtarget.getXLenVT()), MMO);

17199

17200for (SDValue Ld :N->ops())

17201 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);

17202

17203return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);

17204}

17205

17206staticSDValue performVECTOR_SHUFFLECombine(SDNode *N,SelectionDAG &DAG,

17207constRISCVSubtarget &Subtarget,

17208constRISCVTargetLowering &TLI) {

17209SDLoc DL(N);

17210EVT VT =N->getValueType(0);

17211constunsigned ElementSize = VT.getScalarSizeInBits();

17212constunsigned NumElts = VT.getVectorNumElements();

17213SDValue V1 =N->getOperand(0);

17214SDValue V2 =N->getOperand(1);

17215ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();

17216MVT XLenVT = Subtarget.getXLenVT();

17217

17218// Recognized a disguised select of add/sub.

17219bool SwapCC;

17220if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&

17221matchSelectAddSub(V1, V2, SwapCC)) {

17222SDValue Sub = SwapCC ? V1 : V2;

17223SDValue A = Sub.getOperand(0);

17224SDValue B = Sub.getOperand(1);

17225

17226SmallVector<SDValue> MaskVals;

17227for (int MaskIndex : Mask) {

17228bool SelectMaskVal = (MaskIndex < (int)NumElts);

17229 MaskVals.push_back(DAG.getConstant(SelectMaskVal,DL, XLenVT));

17230 }

17231assert(MaskVals.size() == NumElts &&"Unexpected select-like shuffle");

17232EVT MaskVT =EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);

17233SDValue CC = DAG.getBuildVector(MaskVT,DL, MaskVals);

17234

17235// Arrange the select such that we can match a masked

17236// vrsub.vi to perform the conditional negate

17237SDValue NegB = DAG.getNegative(B,DL, VT);

17238if (!SwapCC)

17239CC = DAG.getLogicalNOT(DL,CC,CC->getValueType(0));

17240SDValue NewB = DAG.getNode(ISD::VSELECT,DL, VT,CC, NegB,B);

17241return DAG.getNode(ISD::ADD,DL, VT,A, NewB);

17242 }

17243

17244// Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs

17245// during the combine phase before type legalization, and relies on

17246// DAGCombine not undoing the transform if isShuffleMaskLegal returns false

17247// for the source mask.

17248if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||

17249 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||

17250 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))

17251returnSDValue();

17252

17253SmallVector<int, 8> NewMask;

17254narrowShuffleMaskElts(2, Mask, NewMask);

17255

17256LLVMContext &C = *DAG.getContext();

17257EVT NewEltVT =EVT::getIntegerVT(C, ElementSize / 2);

17258EVT NewVT =EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);

17259SDValue Res = DAG.getVectorShuffle(NewVT,DL, DAG.getBitcast(NewVT, V1),

17260 DAG.getBitcast(NewVT, V2), NewMask);

17261return DAG.getBitcast(VT, Res);

17262}

17263

17264staticSDValue combineToVWMACC(SDNode *N,SelectionDAG &DAG,

17265constRISCVSubtarget &Subtarget) {

17266

17267assert(N->getOpcode() ==RISCVISD::ADD_VL ||N->getOpcode() ==ISD::ADD);

17268

17269if (N->getValueType(0).isFixedLengthVector())

17270returnSDValue();

17271

17272SDValue Addend =N->getOperand(0);

17273SDValue MulOp =N->getOperand(1);

17274

17275if (N->getOpcode() ==RISCVISD::ADD_VL) {

17276SDValue AddPassthruOp =N->getOperand(2);

17277if (!AddPassthruOp.isUndef())

17278returnSDValue();

17279 }

17280

17281auto IsVWMulOpc = [](unsigned Opc) {

17282switch (Opc) {

17283caseRISCVISD::VWMUL_VL:

17284caseRISCVISD::VWMULU_VL:

17285caseRISCVISD::VWMULSU_VL:

17286returntrue;

17287default:

17288returnfalse;

17289 }

17290 };

17291

17292if (!IsVWMulOpc(MulOp.getOpcode()))

17293std::swap(Addend, MulOp);

17294

17295if (!IsVWMulOpc(MulOp.getOpcode()))

17296returnSDValue();

17297

17298SDValue MulPassthruOp = MulOp.getOperand(2);

17299

17300if (!MulPassthruOp.isUndef())

17301returnSDValue();

17302

17303auto [AddMask, AddVL] = [](SDNode *N,SelectionDAG &DAG,

17304constRISCVSubtarget &Subtarget) {

17305if (N->getOpcode() ==ISD::ADD) {

17306SDLoc DL(N);

17307returngetDefaultScalableVLOps(N->getSimpleValueType(0),DL, DAG,

17308 Subtarget);

17309 }

17310return std::make_pair(N->getOperand(3),N->getOperand(4));

17311 }(N, DAG, Subtarget);

17312

17313SDValue MulMask = MulOp.getOperand(3);

17314SDValue MulVL = MulOp.getOperand(4);

17315

17316if (AddMask != MulMask || AddVL != MulVL)

17317returnSDValue();

17318

17319unsigned Opc =RISCVISD::VWMACC_VL + MulOp.getOpcode() -RISCVISD::VWMUL_VL;

17320static_assert(RISCVISD::VWMACC_VL + 1 ==RISCVISD::VWMACCU_VL,

17321"Unexpected opcode after VWMACC_VL");

17322static_assert(RISCVISD::VWMACC_VL + 2 ==RISCVISD::VWMACCSU_VL,

17323"Unexpected opcode after VWMACC_VL!");

17324static_assert(RISCVISD::VWMUL_VL + 1 ==RISCVISD::VWMULU_VL,

17325"Unexpected opcode after VWMUL_VL!");

17326static_assert(RISCVISD::VWMUL_VL + 2 ==RISCVISD::VWMULSU_VL,

17327"Unexpected opcode after VWMUL_VL!");

17328

17329SDLoc DL(N);

17330EVT VT =N->getValueType(0);

17331SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,

17332 AddVL};

17333return DAG.getNode(Opc,DL, VT, Ops);

17334}

17335

17336staticboollegalizeScatterGatherIndexType(SDLoc DL,SDValue &Index,

17337ISD::MemIndexType &IndexType,

17338RISCVTargetLowering::DAGCombinerInfo &DCI) {

17339if (!DCI.isBeforeLegalize())

17340returnfalse;

17341

17342SelectionDAG &DAG = DCI.DAG;

17343constMVT XLenVT =

17344 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();

17345

17346constEVT IndexVT = Index.getValueType();

17347

17348// RISC-V indexed loads only support the "unsigned unscaled" addressing

17349// mode, so anything else must be manually legalized.

17350if (!isIndexTypeSigned(IndexType))

17351returnfalse;

17352

17353if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {

17354// Any index legalization should first promote to XLenVT, so we don't lose

17355// bits when scaling. This may create an illegal index type so we let

17356// LLVM's legalization take care of the splitting.

17357// FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.

17358 Index = DAG.getNode(ISD::SIGN_EXTEND,DL,

17359 IndexVT.changeVectorElementType(XLenVT), Index);

17360 }

17361 IndexType =ISD::UNSIGNED_SCALED;

17362returntrue;

17363}

17364

17365/// Match the index vector of a scatter or gather node as the shuffle mask

17366/// which performs the rearrangement if possible. Will only match if

17367/// all lanes are touched, and thus replacing the scatter or gather with

17368/// a unit strided access and shuffle is legal.

17369staticboolmatchIndexAsShuffle(EVT VT,SDValue Index,SDValue Mask,

17370SmallVector<int> &ShuffleMask) {

17371if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))

17372returnfalse;

17373if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))

17374returnfalse;

17375

17376constunsigned ElementSize = VT.getScalarStoreSize();

17377constunsigned NumElems = VT.getVectorNumElements();

17378

17379// Create the shuffle mask and check all bits active

17380assert(ShuffleMask.empty());

17381BitVector ActiveLanes(NumElems);

17382for (unsigned i = 0; i < Index->getNumOperands(); i++) {

17383// TODO: We've found an active bit of UB, and could be

17384// more aggressive here if desired.

17385if (Index->getOperand(i)->isUndef())

17386returnfalse;

17387uint64_t C = Index->getConstantOperandVal(i);

17388if (C % ElementSize != 0)

17389returnfalse;

17390C =C / ElementSize;

17391if (C >= NumElems)

17392returnfalse;

17393 ShuffleMask.push_back(C);

17394 ActiveLanes.set(C);

17395 }

17396return ActiveLanes.all();

17397}

17398

17399/// Match the index of a gather or scatter operation as an operation

17400/// with twice the element width and half the number of elements. This is

17401/// generally profitable (if legal) because these operations are linear

17402/// in VL, so even if we cause some extract VTYPE/VL toggles, we still

17403/// come out ahead.

17404staticboolmatchIndexAsWiderOp(EVT VT,SDValue Index,SDValue Mask,

17405Align BaseAlign,constRISCVSubtarget &ST) {

17406if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))

17407returnfalse;

17408if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))

17409returnfalse;

17410

17411// Attempt a doubling. If we can use a element type 4x or 8x in

17412// size, this will happen via multiply iterations of the transform.

17413constunsigned NumElems = VT.getVectorNumElements();

17414if (NumElems % 2 != 0)

17415returnfalse;

17416

17417constunsigned ElementSize = VT.getScalarStoreSize();

17418constunsigned WiderElementSize = ElementSize * 2;

17419if (WiderElementSize > ST.getELen()/8)

17420returnfalse;

17421

17422if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)

17423returnfalse;

17424

17425for (unsigned i = 0; i < Index->getNumOperands(); i++) {

17426// TODO: We've found an active bit of UB, and could be

17427// more aggressive here if desired.

17428if (Index->getOperand(i)->isUndef())

17429returnfalse;

17430// TODO: This offset check is too strict if we support fully

17431// misaligned memory operations.

17432uint64_t C = Index->getConstantOperandVal(i);

17433if (i % 2 == 0) {

17434if (C % WiderElementSize != 0)

17435returnfalse;

17436continue;

17437 }

17438uint64_t Last = Index->getConstantOperandVal(i-1);

17439if (C !=Last + ElementSize)

17440returnfalse;

17441 }

17442returntrue;

17443}

17444

17445// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))

17446// This would be benefit for the cases where X and Y are both the same value

17447// type of low precision vectors. Since the truncate would be lowered into

17448// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate

17449// restriction, such pattern would be expanded into a series of "vsetvli"

17450// and "vnsrl" instructions later to reach this point.

17451staticSDValue combineTruncOfSraSext(SDNode *N,SelectionDAG &DAG) {

17452SDValue Mask =N->getOperand(1);

17453SDValue VL =N->getOperand(2);

17454

17455bool IsVLMAX =isAllOnesConstant(VL) ||

17456 (isa<RegisterSDNode>(VL) &&

17457 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);

17458if (!IsVLMAX || Mask.getOpcode() !=RISCVISD::VMSET_VL ||

17459 Mask.getOperand(0) != VL)

17460returnSDValue();

17461

17462auto IsTruncNode = [&](SDValue V) {

17463return V.getOpcode() ==RISCVISD::TRUNCATE_VECTOR_VL &&

17464 V.getOperand(1) == Mask && V.getOperand(2) == VL;

17465 };

17466

17467SDValue Op =N->getOperand(0);

17468

17469// We need to first find the inner level of TRUNCATE_VECTOR_VL node

17470// to distinguish such pattern.

17471while (IsTruncNode(Op)) {

17472if (!Op.hasOneUse())

17473returnSDValue();

17474Op =Op.getOperand(0);

17475 }

17476

17477if (Op.getOpcode() !=ISD::SRA || !Op.hasOneUse())

17478returnSDValue();

17479

17480SDValue N0 =Op.getOperand(0);

17481SDValue N1 =Op.getOperand(1);

17482if (N0.getOpcode() !=ISD::SIGN_EXTEND || !N0.hasOneUse() ||

17483 N1.getOpcode() !=ISD::ZERO_EXTEND || !N1.hasOneUse())

17484returnSDValue();

17485

17486SDValue N00 = N0.getOperand(0);

17487SDValue N10 = N1.getOperand(0);

17488if (!N00.getValueType().isVector() ||

17489 N00.getValueType() != N10.getValueType() ||

17490N->getValueType(0) != N10.getValueType())

17491returnSDValue();

17492

17493unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;

17494SDValue SMin =

17495 DAG.getNode(ISD::SMIN,SDLoc(N1),N->getValueType(0), N10,

17496 DAG.getConstant(MaxShAmt,SDLoc(N1),N->getValueType(0)));

17497return DAG.getNode(ISD::SRA,SDLoc(N),N->getValueType(0), N00,SMin);

17498}

17499

17500// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the

17501// maximum value for the truncated type.

17502// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1

17503// is the signed maximum value for the truncated type and C2 is the signed

17504// minimum value.

17505staticSDValue combineTruncToVnclip(SDNode *N,SelectionDAG &DAG,

17506constRISCVSubtarget &Subtarget) {

17507assert(N->getOpcode() ==RISCVISD::TRUNCATE_VECTOR_VL);

17508

17509MVT VT =N->getSimpleValueType(0);

17510

17511SDValue Mask =N->getOperand(1);

17512SDValue VL =N->getOperand(2);

17513

17514auto MatchMinMax = [&VL, &Mask](SDValue V,unsigned Opc,unsigned OpcVL,

17515APInt &SplatVal) {

17516if (V.getOpcode() != Opc &&

17517 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&

17518 V.getOperand(3) == Mask && V.getOperand(4) == VL))

17519returnSDValue();

17520

17521SDValue Op = V.getOperand(1);

17522

17523// Peek through conversion between fixed and scalable vectors.

17524if (Op.getOpcode() ==ISD::INSERT_SUBVECTOR &&Op.getOperand(0).isUndef() &&

17525isNullConstant(Op.getOperand(2)) &&

17526Op.getOperand(1).getValueType().isFixedLengthVector() &&

17527Op.getOperand(1).getOpcode() ==ISD::EXTRACT_SUBVECTOR &&

17528Op.getOperand(1).getOperand(0).getValueType() ==Op.getValueType() &&

17529isNullConstant(Op.getOperand(1).getOperand(1)))

17530Op =Op.getOperand(1).getOperand(0);

17531

17532if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))

17533return V.getOperand(0);

17534

17535if (Op.getOpcode() ==RISCVISD::VMV_V_X_VL &&Op.getOperand(0).isUndef() &&

17536Op.getOperand(2) == VL) {

17537if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

17538 SplatVal =

17539 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());

17540return V.getOperand(0);

17541 }

17542 }

17543

17544returnSDValue();

17545 };

17546

17547SDLoc DL(N);

17548

17549auto DetectUSatPattern = [&](SDValue V) {

17550APInt LoC, HiC;

17551

17552// Simple case, V is a UMIN.

17553if (SDValue UMinOp = MatchMinMax(V,ISD::UMIN,RISCVISD::UMIN_VL, HiC))

17554if (HiC.isMask(VT.getScalarSizeInBits()))

17555return UMinOp;

17556

17557// If we have an SMAX that removes negative numbers first, then we can match

17558// SMIN instead of UMIN.

17559if (SDValue SMinOp = MatchMinMax(V,ISD::SMIN,RISCVISD::SMIN_VL, HiC))

17560if (SDValue SMaxOp =

17561 MatchMinMax(SMinOp,ISD::SMAX,RISCVISD::SMAX_VL, LoC))

17562if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))

17563return SMinOp;

17564

17565// If we have an SMIN before an SMAX and the SMAX constant is less than or

17566// equal to the SMIN constant, we can use vnclipu if we insert a new SMAX

17567// first.

17568if (SDValue SMaxOp = MatchMinMax(V,ISD::SMAX,RISCVISD::SMAX_VL, LoC))

17569if (SDValue SMinOp =

17570 MatchMinMax(SMaxOp,ISD::SMIN,RISCVISD::SMIN_VL, HiC))

17571if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&

17572 HiC.uge(LoC))

17573return DAG.getNode(RISCVISD::SMAX_VL,DL, V.getValueType(), SMinOp,

17574 V.getOperand(1), DAG.getUNDEF(V.getValueType()),

17575 Mask, VL);

17576

17577returnSDValue();

17578 };

17579

17580auto DetectSSatPattern = [&](SDValue V) {

17581unsigned NumDstBits = VT.getScalarSizeInBits();

17582unsigned NumSrcBits = V.getScalarValueSizeInBits();

17583APInt SignedMax =APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);

17584APInt SignedMin =APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);

17585

17586APInt HiC, LoC;

17587if (SDValue SMinOp = MatchMinMax(V,ISD::SMIN,RISCVISD::SMIN_VL, HiC))

17588if (SDValue SMaxOp =

17589 MatchMinMax(SMinOp,ISD::SMAX,RISCVISD::SMAX_VL, LoC))

17590if (HiC == SignedMax && LoC == SignedMin)

17591return SMaxOp;

17592

17593if (SDValue SMaxOp = MatchMinMax(V,ISD::SMAX,RISCVISD::SMAX_VL, LoC))

17594if (SDValue SMinOp =

17595 MatchMinMax(SMaxOp,ISD::SMIN,RISCVISD::SMIN_VL, HiC))

17596if (HiC == SignedMax && LoC == SignedMin)

17597return SMinOp;

17598

17599returnSDValue();

17600 };

17601

17602SDValue Src =N->getOperand(0);

17603

17604// Look through multiple layers of truncates.

17605while (Src.getOpcode() ==RISCVISD::TRUNCATE_VECTOR_VL &&

17606 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&

17607 Src.hasOneUse())

17608 Src = Src.getOperand(0);

17609

17610SDValue Val;

17611unsigned ClipOpc;

17612if ((Val = DetectUSatPattern(Src)))

17613 ClipOpc =RISCVISD::TRUNCATE_VECTOR_VL_USAT;

17614elseif ((Val = DetectSSatPattern(Src)))

17615 ClipOpc =RISCVISD::TRUNCATE_VECTOR_VL_SSAT;

17616else

17617returnSDValue();

17618

17619MVT ValVT = Val.getSimpleValueType();

17620

17621do {

17622MVT ValEltVT =MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);

17623 ValVT = ValVT.changeVectorElementType(ValEltVT);

17624 Val = DAG.getNode(ClipOpc,DL, ValVT, Val, Mask, VL);

17625 }while (ValVT != VT);

17626

17627return Val;

17628}

17629

17630// Convert

17631// (iX ctpop (bitcast (vXi1 A)))

17632// ->

17633// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))

17634// FIXME: It's complicated to match all the variations of this after type

17635// legalization so we only handle the pre-type legalization pattern, but that

17636// requires the fixed vector type to be legal.

17637staticSDValue combineScalarCTPOPToVCPOP(SDNode *N,SelectionDAG &DAG,

17638constRISCVSubtarget &Subtarget) {

17639EVT VT =N->getValueType(0);

17640if (!VT.isScalarInteger())

17641returnSDValue();

17642

17643SDValue Src =N->getOperand(0);

17644

17645// Peek through zero_extend. It doesn't change the count.

17646if (Src.getOpcode() ==ISD::ZERO_EXTEND)

17647 Src = Src.getOperand(0);

17648

17649if (Src.getOpcode() !=ISD::BITCAST)

17650returnSDValue();

17651

17652 Src = Src.getOperand(0);

17653EVT SrcEVT = Src.getValueType();

17654if (!SrcEVT.isSimple())

17655returnSDValue();

17656

17657MVT SrcMVT = SrcEVT.getSimpleVT();

17658// Make sure the input is an i1 vector.

17659if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)

17660returnSDValue();

17661

17662if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))

17663returnSDValue();

17664

17665MVT ContainerVT =getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);

17666 Src =convertToScalableVector(ContainerVT, Src, DAG, Subtarget);

17667

17668SDLoc DL(N);

17669auto [Mask, VL] =getDefaultVLOps(SrcMVT, ContainerVT,DL, DAG, Subtarget);

17670

17671MVT XLenVT = Subtarget.getXLenVT();

17672SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL,DL, XLenVT, Src, Mask, VL);

17673return DAG.getZExtOrTrunc(Pop,DL, VT);

17674}

17675

17676SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,

17677DAGCombinerInfo &DCI) const{

17678SelectionDAG &DAG = DCI.DAG;

17679constMVT XLenVT = Subtarget.getXLenVT();

17680SDLoc DL(N);

17681

17682// Helper to call SimplifyDemandedBits on an operand of N where only some low

17683// bits are demanded. N will be added to the Worklist if it was not deleted.

17684// Caller should return SDValue(N, 0) if this returns true.

17685auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo,unsigned LowBits) {

17686SDValue Op =N->getOperand(OpNo);

17687APInt Mask =APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);

17688if (!SimplifyDemandedBits(Op, Mask, DCI))

17689returnfalse;

17690

17691if (N->getOpcode() !=ISD::DELETED_NODE)

17692 DCI.AddToWorklist(N);

17693returntrue;

17694 };

17695

17696switch (N->getOpcode()) {

17697default:

17698break;

17699caseRISCVISD::SplitF64: {

17700SDValue Op0 =N->getOperand(0);

17701// If the input to SplitF64 is just BuildPairF64 then the operation is

17702// redundant. Instead, use BuildPairF64's operands directly.

17703if (Op0->getOpcode() ==RISCVISD::BuildPairF64)

17704return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));

17705

17706if (Op0->isUndef()) {

17707SDValue Lo = DAG.getUNDEF(MVT::i32);

17708SDValue Hi = DAG.getUNDEF(MVT::i32);

17709return DCI.CombineTo(N,Lo,Hi);

17710 }

17711

17712// It's cheaper to materialise two 32-bit integers than to load a double

17713// from the constant pool and transfer it to integer registers through the

17714// stack.

17715if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {

17716APInt V =C->getValueAPF().bitcastToAPInt();

17717SDValue Lo = DAG.getConstant(V.trunc(32),DL, MVT::i32);

17718SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32),DL, MVT::i32);

17719return DCI.CombineTo(N,Lo,Hi);

17720 }

17721

17722// This is a target-specific version of a DAGCombine performed in

17723// DAGCombiner::visitBITCAST. It performs the equivalent of:

17724// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)

17725// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))

17726if (!(Op0.getOpcode() ==ISD::FNEG || Op0.getOpcode() ==ISD::FABS) ||

17727 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())

17728break;

17729SDValue NewSplitF64 =

17730 DAG.getNode(RISCVISD::SplitF64,DL, DAG.getVTList(MVT::i32, MVT::i32),

17731 Op0.getOperand(0));

17732SDValue Lo = NewSplitF64.getValue(0);

17733SDValue Hi = NewSplitF64.getValue(1);

17734APInt SignBit =APInt::getSignMask(32);

17735if (Op0.getOpcode() ==ISD::FNEG) {

17736SDValue NewHi = DAG.getNode(ISD::XOR,DL, MVT::i32,Hi,

17737 DAG.getConstant(SignBit,DL, MVT::i32));

17738return DCI.CombineTo(N,Lo, NewHi);

17739 }

17740assert(Op0.getOpcode() ==ISD::FABS);

17741SDValue NewHi = DAG.getNode(ISD::AND,DL, MVT::i32,Hi,

17742 DAG.getConstant(~SignBit,DL, MVT::i32));

17743return DCI.CombineTo(N,Lo, NewHi);

17744 }

17745caseRISCVISD::SLLW:

17746caseRISCVISD::SRAW:

17747caseRISCVISD::SRLW:

17748caseRISCVISD::RORW:

17749caseRISCVISD::ROLW: {

17750// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.

17751if (SimplifyDemandedLowBitsHelper(0, 32) ||

17752 SimplifyDemandedLowBitsHelper(1, 5))

17753returnSDValue(N, 0);

17754

17755break;

17756 }

17757caseRISCVISD::CLZW:

17758caseRISCVISD::CTZW: {

17759// Only the lower 32 bits of the first operand are read

17760if (SimplifyDemandedLowBitsHelper(0, 32))

17761returnSDValue(N, 0);

17762break;

17763 }

17764caseRISCVISD::FMV_W_X_RV64: {

17765// If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the

17766// conversion is unnecessary and can be replaced with the

17767// FMV_X_ANYEXTW_RV64 operand.

17768SDValue Op0 =N->getOperand(0);

17769if (Op0.getOpcode() ==RISCVISD::FMV_X_ANYEXTW_RV64)

17770return Op0.getOperand(0);

17771break;

17772 }

17773caseRISCVISD::FMV_X_ANYEXTH:

17774caseRISCVISD::FMV_X_ANYEXTW_RV64: {

17775SDLoc DL(N);

17776SDValue Op0 =N->getOperand(0);

17777MVT VT =N->getSimpleValueType(0);

17778

17779// Constant fold.

17780if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {

17781APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());

17782return DAG.getConstant(Val,DL, VT);

17783 }

17784

17785// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the

17786// conversion is unnecessary and can be replaced with the FMV_W_X_RV64

17787// operand. Similar for FMV_X_ANYEXTH and FMV_H_X.

17788if ((N->getOpcode() ==RISCVISD::FMV_X_ANYEXTW_RV64 &&

17789 Op0->getOpcode() ==RISCVISD::FMV_W_X_RV64) ||

17790 (N->getOpcode() ==RISCVISD::FMV_X_ANYEXTH &&

17791 Op0->getOpcode() ==RISCVISD::FMV_H_X)) {

17792assert(Op0.getOperand(0).getValueType() == VT &&

17793"Unexpected value type!");

17794return Op0.getOperand(0);

17795 }

17796

17797if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&

17798 cast<LoadSDNode>(Op0)->isSimple()) {

17799MVT IVT =MVT::getIntegerVT(Op0.getValueSizeInBits());

17800auto *LN0 = cast<LoadSDNode>(Op0);

17801SDValue Load =

17802 DAG.getExtLoad(ISD::EXTLOAD,SDLoc(N), VT, LN0->getChain(),

17803 LN0->getBasePtr(), IVT, LN0->getMemOperand());

17804 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));

17805return Load;

17806 }

17807

17808// This is a target-specific version of a DAGCombine performed in

17809// DAGCombiner::visitBITCAST. It performs the equivalent of:

17810// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)

17811// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))

17812if (!(Op0.getOpcode() ==ISD::FNEG || Op0.getOpcode() ==ISD::FABS) ||

17813 !Op0.getNode()->hasOneUse())

17814break;

17815SDValue NewFMV = DAG.getNode(N->getOpcode(),DL, VT, Op0.getOperand(0));

17816unsigned FPBits =N->getOpcode() ==RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;

17817APInt SignBit =APInt::getSignMask(FPBits).sext(VT.getSizeInBits());

17818if (Op0.getOpcode() ==ISD::FNEG)

17819return DAG.getNode(ISD::XOR,DL, VT, NewFMV,

17820 DAG.getConstant(SignBit,DL, VT));

17821

17822assert(Op0.getOpcode() ==ISD::FABS);

17823return DAG.getNode(ISD::AND,DL, VT, NewFMV,

17824 DAG.getConstant(~SignBit,DL, VT));

17825 }

17826caseISD::ABS: {

17827EVT VT =N->getValueType(0);

17828SDValue N0 =N->getOperand(0);

17829// abs (sext) -> zext (abs)

17830// abs (zext) -> zext (handled elsewhere)

17831if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() ==ISD::SIGN_EXTEND) {

17832SDValue Src = N0.getOperand(0);

17833SDLoc DL(N);

17834return DAG.getNode(ISD::ZERO_EXTEND,DL, VT,

17835 DAG.getNode(ISD::ABS,DL, Src.getValueType(), Src));

17836 }

17837break;

17838 }

17839caseISD::ADD: {

17840if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))

17841return V;

17842if (SDValue V =combineToVWMACC(N, DAG, Subtarget))

17843return V;

17844returnperformADDCombine(N, DCI, Subtarget);

17845 }

17846caseISD::SUB: {

17847if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))

17848return V;

17849returnperformSUBCombine(N, DAG, Subtarget);

17850 }

17851caseISD::AND:

17852returnperformANDCombine(N, DCI, Subtarget);

17853caseISD::OR: {

17854if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))

17855return V;

17856returnperformORCombine(N, DCI, Subtarget);

17857 }

17858caseISD::XOR:

17859returnperformXORCombine(N, DAG, Subtarget);

17860caseISD::MUL:

17861if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))

17862return V;

17863returnperformMULCombine(N, DAG, DCI, Subtarget);

17864caseISD::SDIV:

17865caseISD::UDIV:

17866caseISD::SREM:

17867caseISD::UREM:

17868if (SDValue V =combineBinOpOfZExt(N, DAG))

17869return V;

17870break;

17871caseISD::FMUL: {

17872// fmul X, (copysign 1.0, Y) -> fsgnjx X, Y

17873SDValue N0 =N->getOperand(0);

17874SDValue N1 =N->getOperand(1);

17875if (N0->getOpcode() !=ISD::FCOPYSIGN)

17876std::swap(N0, N1);

17877if (N0->getOpcode() !=ISD::FCOPYSIGN)

17878returnSDValue();

17879ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));

17880if (!C || !C->getValueAPF().isExactlyValue(+1.0))

17881returnSDValue();

17882EVT VT =N->getValueType(0);

17883if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))

17884returnSDValue();

17885SDValue Sign = N0->getOperand(1);

17886if (Sign.getValueType() != VT)

17887returnSDValue();

17888return DAG.getNode(RISCVISD::FSGNJX,SDLoc(N), VT, N1, N0->getOperand(1));

17889 }

17890caseISD::FADD:

17891caseISD::UMAX:

17892caseISD::UMIN:

17893caseISD::SMAX:

17894caseISD::SMIN:

17895caseISD::FMAXNUM:

17896caseISD::FMINNUM: {

17897if (SDValue V =combineBinOpToReduce(N, DAG, Subtarget))

17898return V;

17899if (SDValue V =combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))

17900return V;

17901returnSDValue();

17902 }

17903caseISD::SETCC:

17904returnperformSETCCCombine(N, DAG, Subtarget);

17905caseISD::SIGN_EXTEND_INREG:

17906returnperformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);

17907caseISD::ZERO_EXTEND:

17908// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during

17909// type legalization. This is safe because fp_to_uint produces poison if

17910// it overflows.

17911if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {

17912SDValue Src =N->getOperand(0);

17913if (Src.getOpcode() ==ISD::FP_TO_UINT &&

17914isTypeLegal(Src.getOperand(0).getValueType()))

17915return DAG.getNode(ISD::FP_TO_UINT,SDLoc(N), MVT::i64,

17916 Src.getOperand(0));

17917if (Src.getOpcode() ==ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&

17918isTypeLegal(Src.getOperand(1).getValueType())) {

17919SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);

17920SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT,SDLoc(N), VTs,

17921 Src.getOperand(0), Src.getOperand(1));

17922 DCI.CombineTo(N, Res);

17923 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));

17924 DCI.recursivelyDeleteUnusedNodes(Src.getNode());

17925returnSDValue(N, 0);// Return N so it doesn't get rechecked.

17926 }

17927 }

17928returnSDValue();

17929caseRISCVISD::TRUNCATE_VECTOR_VL:

17930if (SDValue V =combineTruncOfSraSext(N, DAG))

17931return V;

17932returncombineTruncToVnclip(N, DAG, Subtarget);

17933caseISD::TRUNCATE:

17934returnperformTRUNCATECombine(N, DAG, Subtarget);

17935caseISD::SELECT:

17936returnperformSELECTCombine(N, DAG, Subtarget);

17937caseISD::VSELECT:

17938returnperformVSELECTCombine(N, DAG);

17939caseRISCVISD::CZERO_EQZ:

17940caseRISCVISD::CZERO_NEZ: {

17941SDValue Val =N->getOperand(0);

17942SDValue Cond =N->getOperand(1);

17943

17944unsigned Opc =N->getOpcode();

17945

17946// czero_eqz x, x -> x

17947if (Opc ==RISCVISD::CZERO_EQZ && Val ==Cond)

17948return Val;

17949

17950unsigned InvOpc =

17951 Opc ==RISCVISD::CZERO_EQZ ?RISCVISD::CZERO_NEZ :RISCVISD::CZERO_EQZ;

17952

17953// czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.

17954// czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.

17955if (Cond.getOpcode() ==ISD::XOR &&isOneConstant(Cond.getOperand(1))) {

17956SDValue NewCond =Cond.getOperand(0);

17957APInt Mask =APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);

17958if (DAG.MaskedValueIsZero(NewCond, Mask))

17959return DAG.getNode(InvOpc,SDLoc(N),N->getValueType(0), Val, NewCond);

17960 }

17961// czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y

17962// czero_nez x, (setcc y, 0, ne) -> czero_nez x, y

17963// czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y

17964// czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y

17965if (Cond.getOpcode() ==ISD::SETCC &&isNullConstant(Cond.getOperand(1))) {

17966ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

17967if (ISD::isIntEqualitySetCC(CCVal))

17968return DAG.getNode(CCVal ==ISD::SETNE ? Opc : InvOpc,SDLoc(N),

17969N->getValueType(0), Val,Cond.getOperand(0));

17970 }

17971returnSDValue();

17972 }

17973caseRISCVISD::SELECT_CC: {

17974// Transform

17975SDValue LHS =N->getOperand(0);

17976SDValue RHS =N->getOperand(1);

17977SDValue CC =N->getOperand(2);

17978ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();

17979SDValue TrueV =N->getOperand(3);

17980SDValue FalseV =N->getOperand(4);

17981SDLoc DL(N);

17982EVT VT =N->getValueType(0);

17983

17984// If the True and False values are the same, we don't need a select_cc.

17985if (TrueV == FalseV)

17986return TrueV;

17987

17988// (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z

17989// (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y

17990if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&

17991 isa<ConstantSDNode>(FalseV) &&isNullConstant(RHS) &&

17992 (CCVal ==ISD::CondCode::SETLT || CCVal ==ISD::CondCode::SETGE)) {

17993if (CCVal ==ISD::CondCode::SETGE)

17994std::swap(TrueV, FalseV);

17995

17996 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();

17997 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();

17998// Only handle simm12, if it is not in this range, it can be considered as

17999// register.

18000if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&

18001 isInt<12>(TrueSImm - FalseSImm)) {

18002SDValue SRA =

18003 DAG.getNode(ISD::SRA,DL, VT,LHS,

18004 DAG.getConstant(Subtarget.getXLen() - 1,DL, VT));

18005SDValue AND =

18006 DAG.getNode(ISD::AND,DL, VT, SRA,

18007 DAG.getSignedConstant(TrueSImm - FalseSImm,DL, VT));

18008return DAG.getNode(ISD::ADD,DL, VT, AND, FalseV);

18009 }

18010

18011if (CCVal ==ISD::CondCode::SETGE)

18012std::swap(TrueV, FalseV);

18013 }

18014

18015if (combine_CC(LHS,RHS,CC,DL, DAG, Subtarget))

18016return DAG.getNode(RISCVISD::SELECT_CC,DL,N->getValueType(0),

18017 {LHS, RHS, CC, TrueV, FalseV});

18018

18019if (!Subtarget.hasConditionalMoveFusion()) {

18020// (select c, -1, y) -> -c | y

18021if (isAllOnesConstant(TrueV)) {

18022SDValue C = DAG.getSetCC(DL, VT,LHS,RHS, CCVal);

18023SDValue Neg = DAG.getNegative(C,DL, VT);

18024return DAG.getNode(ISD::OR,DL, VT, Neg, FalseV);

18025 }

18026// (select c, y, -1) -> -!c | y

18027if (isAllOnesConstant(FalseV)) {

18028SDValue C =

18029 DAG.getSetCC(DL, VT,LHS,RHS,ISD::getSetCCInverse(CCVal, VT));

18030SDValue Neg = DAG.getNegative(C,DL, VT);

18031return DAG.getNode(ISD::OR,DL, VT, Neg, TrueV);

18032 }

18033

18034// (select c, 0, y) -> -!c & y

18035if (isNullConstant(TrueV)) {

18036SDValue C =

18037 DAG.getSetCC(DL, VT,LHS,RHS,ISD::getSetCCInverse(CCVal, VT));

18038SDValue Neg = DAG.getNegative(C,DL, VT);

18039return DAG.getNode(ISD::AND,DL, VT, Neg, FalseV);

18040 }

18041// (select c, y, 0) -> -c & y

18042if (isNullConstant(FalseV)) {

18043SDValue C = DAG.getSetCC(DL, VT,LHS,RHS, CCVal);

18044SDValue Neg = DAG.getNegative(C,DL, VT);

18045return DAG.getNode(ISD::AND,DL, VT, Neg, TrueV);

18046 }

18047// (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))

18048// (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))

18049if (((isOneConstant(FalseV) &&LHS == TrueV &&

18050 CCVal ==ISD::CondCode::SETNE) ||

18051 (isOneConstant(TrueV) &&LHS == FalseV &&

18052 CCVal ==ISD::CondCode::SETEQ)) &&

18053isNullConstant(RHS)) {

18054// freeze it to be safe.

18055LHS = DAG.getFreeze(LHS);

18056SDValue C = DAG.getSetCC(DL, VT,LHS,RHS,ISD::CondCode::SETEQ);

18057return DAG.getNode(ISD::ADD,DL, VT,LHS,C);

18058 }

18059 }

18060

18061// If both true/false are an xor with 1, pull through the select.

18062// This can occur after op legalization if both operands are setccs that

18063// require an xor to invert.

18064// FIXME: Generalize to other binary ops with identical operand?

18065if (TrueV.getOpcode() ==ISD::XOR && FalseV.getOpcode() ==ISD::XOR &&

18066 TrueV.getOperand(1) == FalseV.getOperand(1) &&

18067isOneConstant(TrueV.getOperand(1)) &&

18068 TrueV.hasOneUse() && FalseV.hasOneUse()) {

18069SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC,DL, VT,LHS,RHS,CC,

18070 TrueV.getOperand(0), FalseV.getOperand(0));

18071return DAG.getNode(ISD::XOR,DL, VT, NewSel, TrueV.getOperand(1));

18072 }

18073

18074returnSDValue();

18075 }

18076caseRISCVISD::BR_CC: {

18077SDValue LHS =N->getOperand(1);

18078SDValue RHS =N->getOperand(2);

18079SDValue CC =N->getOperand(3);

18080SDLoc DL(N);

18081

18082if (combine_CC(LHS,RHS,CC,DL, DAG, Subtarget))

18083return DAG.getNode(RISCVISD::BR_CC,DL,N->getValueType(0),

18084N->getOperand(0),LHS,RHS,CC,N->getOperand(4));

18085

18086returnSDValue();

18087 }

18088caseISD::BITREVERSE:

18089returnperformBITREVERSECombine(N, DAG, Subtarget);

18090caseISD::FP_TO_SINT:

18091caseISD::FP_TO_UINT:

18092returnperformFP_TO_INTCombine(N, DCI, Subtarget);

18093caseISD::FP_TO_SINT_SAT:

18094caseISD::FP_TO_UINT_SAT:

18095returnperformFP_TO_INT_SATCombine(N, DCI, Subtarget);

18096caseISD::FCOPYSIGN: {

18097EVT VT =N->getValueType(0);

18098if (!VT.isVector())

18099break;

18100// There is a form of VFSGNJ which injects the negated sign of its second

18101// operand. Try and bubble any FNEG up after the extend/round to produce

18102// this optimized pattern. Avoid modifying cases where FP_ROUND and

18103// TRUNC=1.

18104SDValue In2 =N->getOperand(1);

18105// Avoid cases where the extend/round has multiple uses, as duplicating

18106// those is typically more expensive than removing a fneg.

18107if (!In2.hasOneUse())

18108break;

18109if (In2.getOpcode() !=ISD::FP_EXTEND &&

18110 (In2.getOpcode() !=ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))

18111break;

18112 In2 = In2.getOperand(0);

18113if (In2.getOpcode() !=ISD::FNEG)

18114break;

18115SDLoc DL(N);

18116SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0),DL, VT);

18117return DAG.getNode(ISD::FCOPYSIGN,DL, VT,N->getOperand(0),

18118 DAG.getNode(ISD::FNEG,DL, VT, NewFPExtRound));

18119 }

18120caseISD::MGATHER: {

18121constauto *MGN = cast<MaskedGatherSDNode>(N);

18122constEVT VT =N->getValueType(0);

18123SDValue Index = MGN->getIndex();

18124SDValue ScaleOp = MGN->getScale();

18125ISD::MemIndexType IndexType = MGN->getIndexType();

18126assert(!MGN->isIndexScaled() &&

18127"Scaled gather/scatter should not be formed");

18128

18129SDLoc DL(N);

18130if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))

18131return DAG.getMaskedGather(

18132N->getVTList(), MGN->getMemoryVT(),DL,

18133 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),

18134 MGN->getBasePtr(), Index, ScaleOp},

18135 MGN->getMemOperand(), IndexType, MGN->getExtensionType());

18136

18137if (narrowIndex(Index, IndexType, DAG))

18138return DAG.getMaskedGather(

18139N->getVTList(), MGN->getMemoryVT(),DL,

18140 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),

18141 MGN->getBasePtr(), Index, ScaleOp},

18142 MGN->getMemOperand(), IndexType, MGN->getExtensionType());

18143

18144if (Index.getOpcode() ==ISD::BUILD_VECTOR &&

18145 MGN->getExtensionType() ==ISD::NON_EXTLOAD &&isTypeLegal(VT)) {

18146// The sequence will be XLenVT, not the type of Index. Tell

18147// isSimpleVIDSequence this so we avoid overflow.

18148if (std::optional<VIDSequence> SimpleVID =

18149isSimpleVIDSequence(Index, Subtarget.getXLen());

18150 SimpleVID && SimpleVID->StepDenominator == 1) {

18151const int64_t StepNumerator = SimpleVID->StepNumerator;

18152const int64_t Addend = SimpleVID->Addend;

18153

18154// Note: We don't need to check alignment here since (by assumption

18155// from the existance of the gather), our offsets must be sufficiently

18156// aligned.

18157

18158constEVT PtrVT =getPointerTy(DAG.getDataLayout());

18159assert(MGN->getBasePtr()->getValueType(0) == PtrVT);

18160assert(IndexType ==ISD::UNSIGNED_SCALED);

18161SDValue BasePtr = DAG.getNode(ISD::ADD,DL, PtrVT, MGN->getBasePtr(),

18162 DAG.getSignedConstant(Addend,DL, PtrVT));

18163

18164SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),

18165 VT.getVectorElementCount());

18166SDValue StridedLoad = DAG.getStridedLoadVP(

18167 VT,DL, MGN->getChain(), BasePtr,

18168 DAG.getSignedConstant(StepNumerator,DL, XLenVT), MGN->getMask(),

18169 EVL, MGN->getMemOperand());

18170SDValue VPSelect = DAG.getNode(ISD::VP_SELECT,DL, VT, MGN->getMask(),

18171 StridedLoad, MGN->getPassThru(), EVL);

18172return DAG.getMergeValues({VPSelect,SDValue(StridedLoad.getNode(), 1)},

18173DL);

18174 }

18175 }

18176

18177SmallVector<int> ShuffleMask;

18178if (MGN->getExtensionType() ==ISD::NON_EXTLOAD &&

18179matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {

18180SDValue Load = DAG.getMaskedLoad(VT,DL, MGN->getChain(),

18181 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),

18182 MGN->getMask(), DAG.getUNDEF(VT),

18183 MGN->getMemoryVT(), MGN->getMemOperand(),

18184ISD::UNINDEXED,ISD::NON_EXTLOAD);

18185SDValue Shuffle =

18186 DAG.getVectorShuffle(VT,DL, Load, DAG.getUNDEF(VT), ShuffleMask);

18187return DAG.getMergeValues({Shuffle, Load.getValue(1)},DL);

18188 }

18189

18190if (MGN->getExtensionType() ==ISD::NON_EXTLOAD &&

18191matchIndexAsWiderOp(VT, Index, MGN->getMask(),

18192 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {

18193SmallVector<SDValue> NewIndices;

18194for (unsigned i = 0; i < Index->getNumOperands(); i += 2)

18195 NewIndices.push_back(Index.getOperand(i));

18196EVT IndexVT = Index.getValueType()

18197 .getHalfNumVectorElementsVT(*DAG.getContext());

18198 Index = DAG.getBuildVector(IndexVT,DL, NewIndices);

18199

18200unsigned ElementSize = VT.getScalarStoreSize();

18201EVT WideScalarVT =MVT::getIntegerVT(ElementSize * 8 * 2);

18202auto EltCnt = VT.getVectorElementCount();

18203assert(EltCnt.isKnownEven() &&"Splitting vector, but not in half!");

18204EVT WideVT =EVT::getVectorVT(*DAG.getContext(), WideScalarVT,

18205 EltCnt.divideCoefficientBy(2));

18206SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());

18207EVT MaskVT =EVT::getVectorVT(*DAG.getContext(), MVT::i1,

18208 EltCnt.divideCoefficientBy(2));

18209SDValue Mask = DAG.getSplat(MaskVT,DL, DAG.getConstant(1,DL, MVT::i1));

18210

18211SDValue Gather =

18212 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT,DL,

18213 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),

18214 Index, ScaleOp},

18215 MGN->getMemOperand(), IndexType,ISD::NON_EXTLOAD);

18216SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));

18217return DAG.getMergeValues({Result, Gather.getValue(1)},DL);

18218 }

18219break;

18220 }

18221caseISD::MSCATTER:{

18222constauto *MSN = cast<MaskedScatterSDNode>(N);

18223SDValue Index = MSN->getIndex();

18224SDValue ScaleOp = MSN->getScale();

18225ISD::MemIndexType IndexType = MSN->getIndexType();

18226assert(!MSN->isIndexScaled() &&

18227"Scaled gather/scatter should not be formed");

18228

18229SDLoc DL(N);

18230if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))

18231return DAG.getMaskedScatter(

18232N->getVTList(), MSN->getMemoryVT(),DL,

18233 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),

18234 Index, ScaleOp},

18235 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());

18236

18237if (narrowIndex(Index, IndexType, DAG))

18238return DAG.getMaskedScatter(

18239N->getVTList(), MSN->getMemoryVT(),DL,

18240 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),

18241 Index, ScaleOp},

18242 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());

18243

18244EVT VT = MSN->getValue()->getValueType(0);

18245SmallVector<int> ShuffleMask;

18246if (!MSN->isTruncatingStore() &&

18247matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {

18248SDValue Shuffle = DAG.getVectorShuffle(VT,DL, MSN->getValue(),

18249 DAG.getUNDEF(VT), ShuffleMask);

18250return DAG.getMaskedStore(MSN->getChain(),DL, Shuffle, MSN->getBasePtr(),

18251 DAG.getUNDEF(XLenVT), MSN->getMask(),

18252 MSN->getMemoryVT(), MSN->getMemOperand(),

18253ISD::UNINDEXED,false);

18254 }

18255break;

18256 }

18257case ISD::VP_GATHER: {

18258constauto *VPGN = cast<VPGatherSDNode>(N);

18259SDValue Index = VPGN->getIndex();

18260SDValue ScaleOp = VPGN->getScale();

18261ISD::MemIndexType IndexType = VPGN->getIndexType();

18262assert(!VPGN->isIndexScaled() &&

18263"Scaled gather/scatter should not be formed");

18264

18265SDLoc DL(N);

18266if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))

18267return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(),DL,

18268 {VPGN->getChain(), VPGN->getBasePtr(), Index,

18269 ScaleOp, VPGN->getMask(),

18270 VPGN->getVectorLength()},

18271 VPGN->getMemOperand(), IndexType);

18272

18273if (narrowIndex(Index, IndexType, DAG))

18274return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(),DL,

18275 {VPGN->getChain(), VPGN->getBasePtr(), Index,

18276 ScaleOp, VPGN->getMask(),

18277 VPGN->getVectorLength()},

18278 VPGN->getMemOperand(), IndexType);

18279

18280break;

18281 }

18282case ISD::VP_SCATTER: {

18283constauto *VPSN = cast<VPScatterSDNode>(N);

18284SDValue Index = VPSN->getIndex();

18285SDValue ScaleOp = VPSN->getScale();

18286ISD::MemIndexType IndexType = VPSN->getIndexType();

18287assert(!VPSN->isIndexScaled() &&

18288"Scaled gather/scatter should not be formed");

18289

18290SDLoc DL(N);

18291if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))

18292return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(),DL,

18293 {VPSN->getChain(), VPSN->getValue(),

18294 VPSN->getBasePtr(), Index, ScaleOp,

18295 VPSN->getMask(), VPSN->getVectorLength()},

18296 VPSN->getMemOperand(), IndexType);

18297

18298if (narrowIndex(Index, IndexType, DAG))

18299return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(),DL,

18300 {VPSN->getChain(), VPSN->getValue(),

18301 VPSN->getBasePtr(), Index, ScaleOp,

18302 VPSN->getMask(), VPSN->getVectorLength()},

18303 VPSN->getMemOperand(), IndexType);

18304break;

18305 }

18306caseRISCVISD::SHL_VL:

18307if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))

18308return V;

18309 [[fallthrough]];

18310caseRISCVISD::SRA_VL:

18311caseRISCVISD::SRL_VL: {

18312SDValue ShAmt =N->getOperand(1);

18313if (ShAmt.getOpcode() ==RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {

18314// We don't need the upper 32 bits of a 64-bit element for a shift amount.

18315SDLoc DL(N);

18316SDValue VL =N->getOperand(4);

18317EVT VT =N->getValueType(0);

18318 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, DAG.getUNDEF(VT),

18319 ShAmt.getOperand(1), VL);

18320return DAG.getNode(N->getOpcode(),DL, VT,N->getOperand(0), ShAmt,

18321N->getOperand(2),N->getOperand(3),N->getOperand(4));

18322 }

18323break;

18324 }

18325caseISD::SRA:

18326if (SDValue V =performSRACombine(N, DAG, Subtarget))

18327return V;

18328 [[fallthrough]];

18329caseISD::SRL:

18330caseISD::SHL: {

18331if (N->getOpcode() ==ISD::SHL) {

18332if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))

18333return V;

18334 }

18335SDValue ShAmt =N->getOperand(1);

18336if (ShAmt.getOpcode() ==RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {

18337// We don't need the upper 32 bits of a 64-bit element for a shift amount.

18338SDLoc DL(N);

18339EVT VT =N->getValueType(0);

18340 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, DAG.getUNDEF(VT),

18341 ShAmt.getOperand(1),

18342 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));

18343return DAG.getNode(N->getOpcode(),DL, VT,N->getOperand(0), ShAmt);

18344 }

18345break;

18346 }

18347caseRISCVISD::ADD_VL:

18348if (SDValue V =combineOp_VLToVWOp_VL(N, DCI, Subtarget))

18349return V;

18350returncombineToVWMACC(N, DAG, Subtarget);

18351caseRISCVISD::VWADD_W_VL:

18352caseRISCVISD::VWADDU_W_VL:

18353caseRISCVISD::VWSUB_W_VL:

18354caseRISCVISD::VWSUBU_W_VL:

18355returnperformVWADDSUBW_VLCombine(N, DCI, Subtarget);

18356caseRISCVISD::SUB_VL:

18357caseRISCVISD::MUL_VL:

18358returncombineOp_VLToVWOp_VL(N, DCI, Subtarget);

18359caseRISCVISD::VFMADD_VL:

18360caseRISCVISD::VFNMADD_VL:

18361caseRISCVISD::VFMSUB_VL:

18362caseRISCVISD::VFNMSUB_VL:

18363caseRISCVISD::STRICT_VFMADD_VL:

18364caseRISCVISD::STRICT_VFNMADD_VL:

18365caseRISCVISD::STRICT_VFMSUB_VL:

18366caseRISCVISD::STRICT_VFNMSUB_VL:

18367returnperformVFMADD_VLCombine(N, DCI, Subtarget);

18368caseRISCVISD::FADD_VL:

18369caseRISCVISD::FSUB_VL:

18370caseRISCVISD::FMUL_VL:

18371caseRISCVISD::VFWADD_W_VL:

18372caseRISCVISD::VFWSUB_W_VL:

18373returncombineOp_VLToVWOp_VL(N, DCI, Subtarget);

18374caseISD::LOAD:

18375caseISD::STORE: {

18376if (DCI.isAfterLegalizeDAG())

18377if (SDValue V =performMemPairCombine(N, DCI))

18378return V;

18379

18380if (N->getOpcode() !=ISD::STORE)

18381break;

18382

18383auto *Store = cast<StoreSDNode>(N);

18384SDValue Chain = Store->getChain();

18385EVT MemVT = Store->getMemoryVT();

18386SDValue Val = Store->getValue();

18387SDLoc DL(N);

18388

18389bool IsScalarizable =

18390 MemVT.isFixedLengthVector() &&ISD::isNormalStore(Store) &&

18391 Store->isSimple() &&

18392 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&

18393isPowerOf2_64(MemVT.getSizeInBits()) &&

18394 MemVT.getSizeInBits() <= Subtarget.getXLen();

18395

18396// If sufficiently aligned we can scalarize stores of constant vectors of

18397// any power-of-two size up to XLen bits, provided that they aren't too

18398// expensive to materialize.

18399// vsetivli zero, 2, e8, m1, ta, ma

18400// vmv.v.i v8, 4

18401// vse64.v v8, (a0)

18402// ->

18403// li a1, 1028

18404// sh a1, 0(a0)

18405if (DCI.isBeforeLegalize() && IsScalarizable &&

18406ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {

18407// Get the constant vector bits

18408APInt NewC(Val.getValueSizeInBits(), 0);

18409uint64_t EltSize = Val.getScalarValueSizeInBits();

18410for (unsigned i = 0; i < Val.getNumOperands(); i++) {

18411if (Val.getOperand(i).isUndef())

18412continue;

18413 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),

18414 i * EltSize);

18415 }

18416MVT NewVT =MVT::getIntegerVT(MemVT.getSizeInBits());

18417

18418if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,

18419true) <= 2 &&

18420allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

18421 NewVT, *Store->getMemOperand())) {

18422SDValue NewV = DAG.getConstant(NewC,DL, NewVT);

18423return DAG.getStore(Chain,DL, NewV, Store->getBasePtr(),

18424 Store->getPointerInfo(), Store->getOriginalAlign(),

18425 Store->getMemOperand()->getFlags());

18426 }

18427 }

18428

18429// Similarly, if sufficiently aligned we can scalarize vector copies, e.g.

18430// vsetivli zero, 2, e16, m1, ta, ma

18431// vle16.v v8, (a0)

18432// vse16.v v8, (a1)

18433if (auto *L = dyn_cast<LoadSDNode>(Val);

18434 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&

18435 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&

18436 Store->getChain() ==SDValue(L, 1) &&ISD::isNormalLoad(L) &&

18437 L->getMemoryVT() == MemVT) {

18438MVT NewVT =MVT::getIntegerVT(MemVT.getSizeInBits());

18439if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

18440 NewVT, *Store->getMemOperand()) &&

18441allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),

18442 NewVT, *L->getMemOperand())) {

18443SDValue NewL = DAG.getLoad(NewVT,DL, L->getChain(), L->getBasePtr(),

18444 L->getPointerInfo(), L->getOriginalAlign(),

18445 L->getMemOperand()->getFlags());

18446return DAG.getStore(Chain,DL, NewL, Store->getBasePtr(),

18447 Store->getPointerInfo(), Store->getOriginalAlign(),

18448 Store->getMemOperand()->getFlags());

18449 }

18450 }

18451

18452// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.

18453// vfmv.f.s is represented as extract element from 0. Match it late to avoid

18454// any illegal types.

18455if (Val.getOpcode() ==RISCVISD::VMV_X_S ||

18456 (DCI.isAfterLegalizeDAG() &&

18457 Val.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&

18458isNullConstant(Val.getOperand(1)))) {

18459SDValue Src = Val.getOperand(0);

18460MVT VecVT = Src.getSimpleValueType();

18461// VecVT should be scalable and memory VT should match the element type.

18462if (!Store->isIndexed() && VecVT.isScalableVector() &&

18463 MemVT == VecVT.getVectorElementType()) {

18464SDLoc DL(N);

18465MVT MaskVT =getMaskTypeFor(VecVT);

18466return DAG.getStoreVP(

18467 Store->getChain(),DL, Src, Store->getBasePtr(), Store->getOffset(),

18468 DAG.getConstant(1,DL, MaskVT),

18469 DAG.getConstant(1,DL, Subtarget.getXLenVT()), MemVT,

18470 Store->getMemOperand(), Store->getAddressingMode(),

18471 Store->isTruncatingStore(),/*IsCompress*/false);

18472 }

18473 }

18474

18475break;

18476 }

18477caseISD::SPLAT_VECTOR: {

18478EVT VT =N->getValueType(0);

18479// Only perform this combine on legal MVT types.

18480if (!isTypeLegal(VT))

18481break;

18482if (auto Gather =matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(),N,

18483 DAG, Subtarget))

18484return Gather;

18485break;

18486 }

18487caseISD::BUILD_VECTOR:

18488if (SDValue V =performBUILD_VECTORCombine(N, DAG, Subtarget, *this))

18489return V;

18490break;

18491caseISD::CONCAT_VECTORS:

18492if (SDValue V =performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))

18493return V;

18494break;

18495caseISD::VECTOR_SHUFFLE:

18496if (SDValue V =performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))

18497return V;

18498break;

18499caseISD::INSERT_VECTOR_ELT:

18500if (SDValue V =performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))

18501return V;

18502break;

18503caseRISCVISD::VFMV_V_F_VL: {

18504constMVT VT =N->getSimpleValueType(0);

18505SDValue Passthru =N->getOperand(0);

18506SDValue Scalar =N->getOperand(1);

18507SDValue VL =N->getOperand(2);

18508

18509// If VL is 1, we can use vfmv.s.f.

18510if (isOneConstant(VL))

18511return DAG.getNode(RISCVISD::VFMV_S_F_VL,DL, VT, Passthru, Scalar, VL);

18512break;

18513 }

18514caseRISCVISD::VMV_V_X_VL: {

18515constMVT VT =N->getSimpleValueType(0);

18516SDValue Passthru =N->getOperand(0);

18517SDValue Scalar =N->getOperand(1);

18518SDValue VL =N->getOperand(2);

18519

18520// Tail agnostic VMV.V.X only demands the vector element bitwidth from the

18521// scalar input.

18522unsigned ScalarSize = Scalar.getValueSizeInBits();

18523unsigned EltWidth = VT.getScalarSizeInBits();

18524if (ScalarSize > EltWidth && Passthru.isUndef())

18525if (SimplifyDemandedLowBitsHelper(1, EltWidth))

18526returnSDValue(N, 0);

18527

18528// If VL is 1 and the scalar value won't benefit from immediate, we can

18529// use vmv.s.x.

18530ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);

18531if (isOneConstant(VL) &&

18532 (!Const || Const->isZero() ||

18533 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))

18534return DAG.getNode(RISCVISD::VMV_S_X_VL,DL, VT, Passthru, Scalar, VL);

18535

18536break;

18537 }

18538caseRISCVISD::VFMV_S_F_VL: {

18539SDValue Src =N->getOperand(1);

18540// Try to remove vector->scalar->vector if the scalar->vector is inserting

18541// into an undef vector.

18542// TODO: Could use a vslide or vmv.v.v for non-undef.

18543if (N->getOperand(0).isUndef() &&

18544 Src.getOpcode() ==ISD::EXTRACT_VECTOR_ELT &&

18545isNullConstant(Src.getOperand(1)) &&

18546 Src.getOperand(0).getValueType().isScalableVector()) {

18547EVT VT =N->getValueType(0);

18548EVT SrcVT = Src.getOperand(0).getValueType();

18549assert(SrcVT.getVectorElementType() == VT.getVectorElementType());

18550// Widths match, just return the original vector.

18551if (SrcVT == VT)

18552return Src.getOperand(0);

18553// TODO: Use insert_subvector/extract_subvector to change widen/narrow?

18554 }

18555 [[fallthrough]];

18556 }

18557caseRISCVISD::VMV_S_X_VL: {

18558constMVT VT =N->getSimpleValueType(0);

18559SDValue Passthru =N->getOperand(0);

18560SDValue Scalar =N->getOperand(1);

18561SDValue VL =N->getOperand(2);

18562

18563if (Scalar.getOpcode() ==RISCVISD::VMV_X_S && Passthru.isUndef() &&

18564 Scalar.getOperand(0).getValueType() ==N->getValueType(0))

18565return Scalar.getOperand(0);

18566

18567// Use M1 or smaller to avoid over constraining register allocation

18568constMVT M1VT =getLMUL1VT(VT);

18569if (M1VT.bitsLT(VT)) {

18570SDValue M1Passthru =

18571 DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, M1VT, Passthru,

18572 DAG.getVectorIdxConstant(0,DL));

18573SDValue Result =

18574 DAG.getNode(N->getOpcode(),DL, M1VT, M1Passthru, Scalar, VL);

18575 Result = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, VT, Passthru, Result,

18576 DAG.getVectorIdxConstant(0,DL));

18577return Result;

18578 }

18579

18580// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or

18581// higher would involve overly constraining the register allocator for

18582// no purpose.

18583if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);

18584 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&

18585 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())

18586return DAG.getNode(RISCVISD::VMV_V_X_VL,DL, VT, Passthru, Scalar, VL);

18587

18588break;

18589 }

18590caseRISCVISD::VMV_X_S: {

18591SDValue Vec =N->getOperand(0);

18592MVT VecVT =N->getOperand(0).getSimpleValueType();

18593constMVT M1VT =getLMUL1VT(VecVT);

18594if (M1VT.bitsLT(VecVT)) {

18595 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, M1VT, Vec,

18596 DAG.getVectorIdxConstant(0,DL));

18597return DAG.getNode(RISCVISD::VMV_X_S,DL,N->getSimpleValueType(0), Vec);

18598 }

18599break;

18600 }

18601caseISD::INTRINSIC_VOID:

18602caseISD::INTRINSIC_W_CHAIN:

18603caseISD::INTRINSIC_WO_CHAIN: {

18604unsigned IntOpNo =N->getOpcode() ==ISD::INTRINSIC_WO_CHAIN ? 0 : 1;

18605unsigned IntNo =N->getConstantOperandVal(IntOpNo);

18606switch (IntNo) {

18607// By default we do not combine any intrinsic.

18608default:

18609returnSDValue();

18610case Intrinsic::riscv_vcpop:

18611case Intrinsic::riscv_vcpop_mask:

18612case Intrinsic::riscv_vfirst:

18613case Intrinsic::riscv_vfirst_mask: {

18614SDValue VL =N->getOperand(2);

18615if (IntNo == Intrinsic::riscv_vcpop_mask ||

18616 IntNo == Intrinsic::riscv_vfirst_mask)

18617 VL =N->getOperand(3);

18618if (!isNullConstant(VL))

18619returnSDValue();

18620// If VL is 0, vcpop -> li 0, vfirst -> li -1.

18621SDLoc DL(N);

18622EVT VT =N->getValueType(0);

18623if (IntNo == Intrinsic::riscv_vfirst ||

18624 IntNo == Intrinsic::riscv_vfirst_mask)

18625return DAG.getAllOnesConstant(DL, VT);

18626return DAG.getConstant(0,DL, VT);

18627 }

18628 }

18629 }

18630case ISD::EXPERIMENTAL_VP_REVERSE:

18631returnperformVP_REVERSECombine(N, DAG, Subtarget);

18632case ISD::VP_STORE:

18633returnperformVP_STORECombine(N, DAG, Subtarget);

18634caseISD::BITCAST: {

18635assert(Subtarget.useRVVForFixedLengthVectors());

18636SDValue N0 =N->getOperand(0);

18637EVT VT =N->getValueType(0);

18638EVT SrcVT = N0.getValueType();

18639if (VT.isRISCVVectorTuple() && N0->getOpcode() ==ISD::SPLAT_VECTOR) {

18640unsigned NF = VT.getRISCVVectorTupleNumFields();

18641unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);

18642SDValue EltVal = DAG.getConstant(0,DL, Subtarget.getXLenVT());

18643MVT ScalTy =MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);

18644

18645SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR,DL, ScalTy, EltVal);

18646

18647SDValue Result = DAG.getUNDEF(VT);

18648for (unsigned i = 0; i < NF; ++i)

18649 Result = DAG.getNode(RISCVISD::TUPLE_INSERT,DL, VT, Result,Splat,

18650 DAG.getVectorIdxConstant(i,DL));

18651return Result;

18652 }

18653// If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer

18654// type, widen both sides to avoid a trip through memory.

18655if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&

18656 VT.isScalarInteger()) {

18657unsigned NumConcats = 8 / SrcVT.getVectorNumElements();

18658SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));

18659 Ops[0] = N0;

18660SDLoc DL(N);

18661 N0 = DAG.getNode(ISD::CONCAT_VECTORS,DL, MVT::v8i1, Ops);

18662 N0 = DAG.getBitcast(MVT::i8, N0);

18663return DAG.getNode(ISD::TRUNCATE,DL, VT, N0);

18664 }

18665

18666returnSDValue();

18667 }

18668caseISD::CTPOP:

18669if (SDValue V =combineScalarCTPOPToVCPOP(N, DAG, Subtarget))

18670return V;

18671break;

18672 }

18673

18674returnSDValue();

18675}

18676

18677boolRISCVTargetLowering::shouldTransformSignedTruncationCheck(

18678EVT XVT,unsigned KeptBits) const{

18679// For vectors, we don't have a preference..

18680if (XVT.isVector())

18681returnfalse;

18682

18683if (XVT != MVT::i32 && XVT != MVT::i64)

18684returnfalse;

18685

18686// We can use sext.w for RV64 or an srai 31 on RV32.

18687if (KeptBits == 32 || KeptBits == 64)

18688returntrue;

18689

18690// With Zbb we can use sext.h/sext.b.

18691return Subtarget.hasStdExtZbb() &&

18692 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||

18693 KeptBits == 16);

18694}

18695

18696boolRISCVTargetLowering::isDesirableToCommuteWithShift(

18697constSDNode *N,CombineLevel Level) const{

18698assert((N->getOpcode() ==ISD::SHL ||N->getOpcode() ==ISD::SRA ||

18699N->getOpcode() ==ISD::SRL) &&

18700"Expected shift op");

18701

18702// The following folds are only desirable if `(OP _, c1 << c2)` can be

18703// materialised in fewer instructions than `(OP _, c1)`:

18704//

18705// (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)

18706// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)

18707SDValue N0 =N->getOperand(0);

18708EVT Ty = N0.getValueType();

18709

18710// LD/ST will optimize constant Offset extraction, so when AddNode is used by

18711// LD/ST, it can still complete the folding optimization operation performed

18712// above.

18713auto isUsedByLdSt = [](constSDNode *X,constSDNode *User) {

18714for (SDNode *Use :X->users()) {

18715// This use is the one we're on right now. Skip it

18716if (Use ==User ||Use->getOpcode() ==ISD::SELECT)

18717continue;

18718if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))

18719returnfalse;

18720 }

18721returntrue;

18722 };

18723

18724if (Ty.isScalarInteger() &&

18725 (N0.getOpcode() ==ISD::ADD || N0.getOpcode() ==ISD::OR)) {

18726if (N0.getOpcode() ==ISD::ADD && !N0->hasOneUse())

18727return isUsedByLdSt(N0.getNode(),N);

18728

18729auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));

18730auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));

18731

18732// Bail if we might break a sh{1,2,3}add pattern.

18733if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&

18734 C2->getZExtValue() <= 3 &&N->hasOneUse() &&

18735N->user_begin()->getOpcode() ==ISD::ADD &&

18736 !isUsedByLdSt(*N->user_begin(),nullptr) &&

18737 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))

18738returnfalse;

18739

18740if (C1 && C2) {

18741constAPInt &C1Int = C1->getAPIntValue();

18742APInt ShiftedC1Int = C1Int << C2->getAPIntValue();

18743

18744// We can materialise `c1 << c2` into an add immediate, so it's "free",

18745// and the combine should happen, to potentially allow further combines

18746// later.

18747if (ShiftedC1Int.getSignificantBits() <= 64 &&

18748isLegalAddImmediate(ShiftedC1Int.getSExtValue()))

18749returntrue;

18750

18751// We can materialise `c1` in an add immediate, so it's "free", and the

18752// combine should be prevented.

18753if (C1Int.getSignificantBits() <= 64 &&

18754isLegalAddImmediate(C1Int.getSExtValue()))

18755returnfalse;

18756

18757// Neither constant will fit into an immediate, so find materialisation

18758// costs.

18759int C1Cost =

18760RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,

18761/*CompressionCost*/true);

18762int ShiftedC1Cost =RISCVMatInt::getIntMatCost(

18763 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,

18764/*CompressionCost*/true);

18765

18766// Materialising `c1` is cheaper than materialising `c1 << c2`, so the

18767// combine should be prevented.

18768if (C1Cost < ShiftedC1Cost)

18769returnfalse;

18770 }

18771 }

18772

18773if (!N0->hasOneUse())

18774returnfalse;

18775

18776if (N0->getOpcode() ==ISD::SIGN_EXTEND &&

18777 N0->getOperand(0)->getOpcode() ==ISD::ADD &&

18778 !N0->getOperand(0)->hasOneUse())

18779return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());

18780

18781returntrue;

18782}

18783

18784boolRISCVTargetLowering::targetShrinkDemandedConstant(

18785SDValue Op,constAPInt &DemandedBits,constAPInt &DemandedElts,

18786TargetLoweringOpt &TLO) const{

18787// Delay this optimization as late as possible.

18788if (!TLO.LegalOps)

18789returnfalse;

18790

18791EVT VT =Op.getValueType();

18792if (VT.isVector())

18793returnfalse;

18794

18795unsigned Opcode =Op.getOpcode();

18796if (Opcode !=ISD::AND && Opcode !=ISD::OR && Opcode !=ISD::XOR)

18797returnfalse;

18798

18799ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));

18800if (!C)

18801returnfalse;

18802

18803constAPInt &Mask =C->getAPIntValue();

18804

18805// Clear all non-demanded bits initially.

18806APInt ShrunkMask = Mask &DemandedBits;

18807

18808// Try to make a smaller immediate by setting undemanded bits.

18809

18810APInt ExpandedMask = Mask |~DemandedBits;

18811

18812auto IsLegalMask = [ShrunkMask, ExpandedMask](constAPInt &Mask) ->bool {

18813return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);

18814 };

18815auto UseMask = [Mask,Op, &TLO](constAPInt &NewMask) ->bool {

18816if (NewMask == Mask)

18817returntrue;

18818SDLoc DL(Op);

18819SDValue NewC = TLO.DAG.getConstant(NewMask,DL,Op.getValueType());

18820SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(),DL,Op.getValueType(),

18821Op.getOperand(0), NewC);

18822return TLO.CombineTo(Op, NewOp);

18823 };

18824

18825// If the shrunk mask fits in sign extended 12 bits, let the target

18826// independent code apply it.

18827if (ShrunkMask.isSignedIntN(12))

18828returnfalse;

18829

18830// And has a few special cases for zext.

18831if (Opcode ==ISD::AND) {

18832// Preserve (and X, 0xffff), if zext.h exists use zext.h,

18833// otherwise use SLLI + SRLI.

18834APInt NewMask =APInt(Mask.getBitWidth(), 0xffff);

18835if (IsLegalMask(NewMask))

18836return UseMask(NewMask);

18837

18838// Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.

18839if (VT == MVT::i64) {

18840APInt NewMask =APInt(64, 0xffffffff);

18841if (IsLegalMask(NewMask))

18842return UseMask(NewMask);

18843 }

18844 }

18845

18846// For the remaining optimizations, we need to be able to make a negative

18847// number through a combination of mask and undemanded bits.

18848if (!ExpandedMask.isNegative())

18849returnfalse;

18850

18851// What is the fewest number of bits we need to represent the negative number.

18852unsigned MinSignedBits = ExpandedMask.getSignificantBits();

18853

18854// Try to make a 12 bit negative immediate. If that fails try to make a 32

18855// bit negative immediate unless the shrunk immediate already fits in 32 bits.

18856// If we can't create a simm12, we shouldn't change opaque constants.

18857APInt NewMask = ShrunkMask;

18858if (MinSignedBits <= 12)

18859 NewMask.setBitsFrom(11);

18860elseif (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))

18861 NewMask.setBitsFrom(31);

18862else

18863returnfalse;

18864

18865// Check that our new mask is a subset of the demanded mask.

18866assert(IsLegalMask(NewMask));

18867return UseMask(NewMask);

18868}

18869

18870staticuint64_t computeGREVOrGORC(uint64_t x,unsigned ShAmt,bool IsGORC) {

18871staticconstuint64_t GREVMasks[] = {

18872 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,

18873 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};

18874

18875for (unsigned Stage = 0; Stage != 6; ++Stage) {

18876unsigned Shift = 1 << Stage;

18877if (ShAmt & Shift) {

18878uint64_t Mask = GREVMasks[Stage];

18879uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);

18880if (IsGORC)

18881 Res |= x;

18882 x = Res;

18883 }

18884 }

18885

18886return x;

18887}

18888

18889voidRISCVTargetLowering::computeKnownBitsForTargetNode(constSDValue Op,

18890KnownBits &Known,

18891constAPInt &DemandedElts,

18892constSelectionDAG &DAG,

18893unsignedDepth) const{

18894unsignedBitWidth = Known.getBitWidth();

18895unsigned Opc =Op.getOpcode();

18896assert((Opc >=ISD::BUILTIN_OP_END ||

18897 Opc ==ISD::INTRINSIC_WO_CHAIN ||

18898 Opc ==ISD::INTRINSIC_W_CHAIN ||

18899 Opc ==ISD::INTRINSIC_VOID) &&

18900"Should use MaskedValueIsZero if you don't know whether Op"

18901" is a target node!");

18902

18903 Known.resetAll();

18904switch (Opc) {

18905default:break;

18906caseRISCVISD::SELECT_CC: {

18907 Known = DAG.computeKnownBits(Op.getOperand(4),Depth + 1);

18908// If we don't know any bits, early out.

18909if (Known.isUnknown())

18910break;

18911KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3),Depth + 1);

18912

18913// Only known if known in both the LHS and RHS.

18914 Known = Known.intersectWith(Known2);

18915break;

18916 }

18917caseRISCVISD::CZERO_EQZ:

18918caseRISCVISD::CZERO_NEZ:

18919 Known = DAG.computeKnownBits(Op.getOperand(0),Depth + 1);

18920// Result is either all zero or operand 0. We can propagate zeros, but not

18921// ones.

18922 Known.One.clearAllBits();

18923break;

18924caseRISCVISD::REMUW: {

18925KnownBits Known2;

18926 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);

18927 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);

18928// We only care about the lower 32 bits.

18929 Known =KnownBits::urem(Known.trunc(32), Known2.trunc(32));

18930// Restore the original width by sign extending.

18931 Known = Known.sext(BitWidth);

18932break;

18933 }

18934caseRISCVISD::DIVUW: {

18935KnownBits Known2;

18936 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);

18937 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);

18938// We only care about the lower 32 bits.

18939 Known =KnownBits::udiv(Known.trunc(32), Known2.trunc(32));

18940// Restore the original width by sign extending.

18941 Known = Known.sext(BitWidth);

18942break;

18943 }

18944caseRISCVISD::SLLW: {

18945KnownBits Known2;

18946 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts,Depth + 1);

18947 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts,Depth + 1);

18948 Known =KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));

18949// Restore the original width by sign extending.

18950 Known = Known.sext(BitWidth);

18951break;

18952 }

18953caseRISCVISD::CTZW: {

18954KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0),Depth + 1);

18955unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();

18956unsigned LowBits =llvm::bit_width(PossibleTZ);

18957 Known.Zero.setBitsFrom(LowBits);

18958break;

18959 }

18960caseRISCVISD::CLZW: {

18961KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0),Depth + 1);

18962unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();

18963unsigned LowBits =llvm::bit_width(PossibleLZ);

18964 Known.Zero.setBitsFrom(LowBits);

18965break;

18966 }

18967caseRISCVISD::BREV8:

18968caseRISCVISD::ORC_B: {

18969// FIXME: This is based on the non-ratified Zbp GREV and GORC where a

18970// control value of 7 is equivalent to brev8 and orc.b.

18971 Known = DAG.computeKnownBits(Op.getOperand(0),Depth + 1);

18972bool IsGORC =Op.getOpcode() ==RISCVISD::ORC_B;

18973// To compute zeros, we need to invert the value and invert it back after.

18974 Known.Zero =

18975 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);

18976 Known.One =computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);

18977break;

18978 }

18979caseRISCVISD::READ_VLENB: {

18980// We can use the minimum and maximum VLEN values to bound VLENB. We

18981// know VLEN must be a power of two.

18982constunsigned MinVLenB = Subtarget.getRealMinVLen() / 8;

18983constunsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;

18984assert(MinVLenB > 0 &&"READ_VLENB without vector extension enabled?");

18985 Known.Zero.setLowBits(Log2_32(MinVLenB));

18986 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);

18987if (MaxVLenB == MinVLenB)

18988 Known.One.setBit(Log2_32(MinVLenB));

18989break;

18990 }

18991caseRISCVISD::FCLASS: {

18992// fclass will only set one of the low 10 bits.

18993 Known.Zero.setBitsFrom(10);

18994break;

18995 }

18996caseISD::INTRINSIC_W_CHAIN:

18997caseISD::INTRINSIC_WO_CHAIN: {

18998unsigned IntNo =

18999Op.getConstantOperandVal(Opc ==ISD::INTRINSIC_WO_CHAIN ? 0 : 1);

19000switch (IntNo) {

19001default:

19002// We can't do anything for most intrinsics.

19003break;

19004case Intrinsic::riscv_vsetvli:

19005case Intrinsic::riscv_vsetvlimax: {

19006bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;

19007unsigned VSEW =Op.getConstantOperandVal(HasAVL + 1);

19008RISCVII::VLMUL VLMUL =

19009static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));

19010unsigned SEW =RISCVVType::decodeVSEW(VSEW);

19011auto [LMul, Fractional] =RISCVVType::decodeVLMUL(VLMUL);

19012uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;

19013 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;

19014

19015// Result of vsetvli must be not larger than AVL.

19016if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))

19017 MaxVL = std::min(MaxVL,Op.getConstantOperandVal(1));

19018

19019unsigned KnownZeroFirstBit =Log2_32(MaxVL) + 1;

19020if (BitWidth > KnownZeroFirstBit)

19021 Known.Zero.setBitsFrom(KnownZeroFirstBit);

19022break;

19023 }

19024 }

19025break;

19026 }

19027 }

19028}

19029

19030unsignedRISCVTargetLowering::ComputeNumSignBitsForTargetNode(

19031SDValue Op,constAPInt &DemandedElts,constSelectionDAG &DAG,

19032unsignedDepth) const{

19033switch (Op.getOpcode()) {

19034default:

19035break;

19036caseRISCVISD::SELECT_CC: {

19037unsigned Tmp =

19038 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts,Depth + 1);

19039if (Tmp == 1)return 1;// Early out.

19040unsigned Tmp2 =

19041 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts,Depth + 1);

19042return std::min(Tmp, Tmp2);

19043 }

19044caseRISCVISD::CZERO_EQZ:

19045caseRISCVISD::CZERO_NEZ:

19046// Output is either all zero or operand 0. We can propagate sign bit count

19047// from operand 0.

19048return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts,Depth + 1);

19049caseRISCVISD::ABSW: {

19050// We expand this at isel to negw+max. The result will have 33 sign bits

19051// if the input has at least 33 sign bits.

19052unsigned Tmp =

19053 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts,Depth + 1);

19054if (Tmp < 33)return 1;

19055return 33;

19056 }

19057caseRISCVISD::SLLW:

19058caseRISCVISD::SRAW:

19059caseRISCVISD::SRLW:

19060caseRISCVISD::DIVW:

19061caseRISCVISD::DIVUW:

19062caseRISCVISD::REMUW:

19063caseRISCVISD::ROLW:

19064caseRISCVISD::RORW:

19065caseRISCVISD::FCVT_W_RV64:

19066caseRISCVISD::FCVT_WU_RV64:

19067caseRISCVISD::STRICT_FCVT_W_RV64:

19068caseRISCVISD::STRICT_FCVT_WU_RV64:

19069// TODO: As the result is sign-extended, this is conservatively correct. A

19070// more precise answer could be calculated for SRAW depending on known

19071// bits in the shift amount.

19072return 33;

19073caseRISCVISD::VMV_X_S: {

19074// The number of sign bits of the scalar result is computed by obtaining the

19075// element type of the input vector operand, subtracting its width from the

19076// XLEN, and then adding one (sign bit within the element type). If the

19077// element type is wider than XLen, the least-significant XLEN bits are

19078// taken.

19079unsigned XLen = Subtarget.getXLen();

19080unsigned EltBits =Op.getOperand(0).getScalarValueSizeInBits();

19081if (EltBits <= XLen)

19082return XLen - EltBits + 1;

19083break;

19084 }

19085caseISD::INTRINSIC_W_CHAIN: {

19086unsigned IntNo =Op.getConstantOperandVal(1);

19087switch (IntNo) {

19088default:

19089break;

19090case Intrinsic::riscv_masked_atomicrmw_xchg_i64:

19091case Intrinsic::riscv_masked_atomicrmw_add_i64:

19092case Intrinsic::riscv_masked_atomicrmw_sub_i64:

19093case Intrinsic::riscv_masked_atomicrmw_nand_i64:

19094case Intrinsic::riscv_masked_atomicrmw_max_i64:

19095case Intrinsic::riscv_masked_atomicrmw_min_i64:

19096case Intrinsic::riscv_masked_atomicrmw_umax_i64:

19097case Intrinsic::riscv_masked_atomicrmw_umin_i64:

19098case Intrinsic::riscv_masked_cmpxchg_i64:

19099// riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated

19100// narrow atomic operation. These are implemented using atomic

19101// operations at the minimum supported atomicrmw/cmpxchg width whose

19102// result is then sign extended to XLEN. With +A, the minimum width is

19103// 32 for both 64 and 32.

19104assert(Subtarget.getXLen() == 64);

19105assert(getMinCmpXchgSizeInBits() == 32);

19106assert(Subtarget.hasStdExtA());

19107return 33;

19108 }

19109break;

19110 }

19111 }

19112

19113return 1;

19114}

19115

19116boolRISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(

19117SDValue Op,constAPInt &DemandedElts,constSelectionDAG &DAG,

19118boolPoisonOnly,bool ConsiderFlags,unsignedDepth) const{

19119

19120// TODO: Add more target nodes.

19121switch (Op.getOpcode()) {

19122caseRISCVISD::SELECT_CC:

19123// Integer select_cc cannot create poison.

19124// TODO: What are the FP poison semantics?

19125// TODO: This instruction blocks poison from the unselected operand, can

19126// we do anything with that?

19127return !Op.getValueType().isInteger();

19128 }

19129returnTargetLowering::canCreateUndefOrPoisonForTargetNode(

19130Op, DemandedElts, DAG,PoisonOnly, ConsiderFlags,Depth);

19131}

19132

19133constConstant *

19134RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const{

19135assert(Ld &&"Unexpected null LoadSDNode");

19136if (!ISD::isNormalLoad(Ld))

19137returnnullptr;

19138

19139SDValue Ptr = Ld->getBasePtr();

19140

19141// Only constant pools with no offset are supported.

19142auto GetSupportedConstantPool = [](SDValue Ptr) ->ConstantPoolSDNode * {

19143auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);

19144if (!CNode || CNode->isMachineConstantPoolEntry() ||

19145 CNode->getOffset() != 0)

19146returnnullptr;

19147

19148return CNode;

19149 };

19150

19151// Simple case, LLA.

19152if (Ptr.getOpcode() ==RISCVISD::LLA) {

19153auto *CNode = GetSupportedConstantPool(Ptr);

19154if (!CNode || CNode->getTargetFlags() != 0)

19155returnnullptr;

19156

19157return CNode->getConstVal();

19158 }

19159

19160// Look for a HI and ADD_LO pair.

19161if (Ptr.getOpcode() !=RISCVISD::ADD_LO ||

19162Ptr.getOperand(0).getOpcode() !=RISCVISD::HI)

19163returnnullptr;

19164

19165auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));

19166auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));

19167

19168if (!CNodeLo || CNodeLo->getTargetFlags() !=RISCVII::MO_LO ||

19169 !CNodeHi || CNodeHi->getTargetFlags() !=RISCVII::MO_HI)

19170returnnullptr;

19171

19172if (CNodeLo->getConstVal() != CNodeHi->getConstVal())

19173returnnullptr;

19174

19175return CNodeLo->getConstVal();

19176}

19177

19178staticMachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI,

19179MachineBasicBlock *BB) {

19180assert(MI.getOpcode() == RISCV::ReadCounterWide &&"Unexpected instruction");

19181

19182// To read a 64-bit counter CSR on a 32-bit target, we read the two halves.

19183// Should the count have wrapped while it was being read, we need to try

19184// again.

19185// For example:

19186// ```

19187// read:

19188// csrrs x3, counterh # load high word of counter

19189// csrrs x2, counter # load low word of counter

19190// csrrs x4, counterh # load high word of counter

19191// bne x3, x4, read # check if high word reads match, otherwise try again

19192// ```

19193

19194MachineFunction &MF = *BB->getParent();

19195constBasicBlock *LLVMBB = BB->getBasicBlock();

19196MachineFunction::iterator It = ++BB->getIterator();

19197

19198MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);

19199 MF.insert(It, LoopMBB);

19200

19201MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);

19202 MF.insert(It, DoneMBB);

19203

19204// Transfer the remainder of BB and its successor edges to DoneMBB.

19205 DoneMBB->splice(DoneMBB->begin(), BB,

19206 std::next(MachineBasicBlock::iterator(MI)), BB->end());

19207 DoneMBB->transferSuccessorsAndUpdatePHIs(BB);

19208

19209 BB->addSuccessor(LoopMBB);

19210

19211MachineRegisterInfo &RegInfo = MF.getRegInfo();

19212Register ReadAgainReg =RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

19213Register LoReg =MI.getOperand(0).getReg();

19214Register HiReg =MI.getOperand(1).getReg();

19215 int64_t LoCounter =MI.getOperand(2).getImm();

19216 int64_t HiCounter =MI.getOperand(3).getImm();

19217DebugLoc DL =MI.getDebugLoc();

19218

19219constTargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();

19220BuildMI(LoopMBB,DL,TII->get(RISCV::CSRRS), HiReg)

19221 .addImm(HiCounter)

19222 .addReg(RISCV::X0);

19223BuildMI(LoopMBB,DL,TII->get(RISCV::CSRRS), LoReg)

19224 .addImm(LoCounter)

19225 .addReg(RISCV::X0);

19226BuildMI(LoopMBB,DL,TII->get(RISCV::CSRRS), ReadAgainReg)

19227 .addImm(HiCounter)

19228 .addReg(RISCV::X0);

19229

19230BuildMI(LoopMBB,DL,TII->get(RISCV::BNE))

19231 .addReg(HiReg)

19232 .addReg(ReadAgainReg)

19233 .addMBB(LoopMBB);

19234

19235 LoopMBB->addSuccessor(LoopMBB);

19236 LoopMBB->addSuccessor(DoneMBB);

19237

19238MI.eraseFromParent();

19239

19240return DoneMBB;

19241}

19242

19243staticMachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,

19244MachineBasicBlock *BB,

19245constRISCVSubtarget &Subtarget) {

19246assert(MI.getOpcode() == RISCV::SplitF64Pseudo &&"Unexpected instruction");

19247

19248MachineFunction &MF = *BB->getParent();

19249DebugLoc DL =MI.getDebugLoc();

19250constTargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

19251constTargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();

19252Register LoReg =MI.getOperand(0).getReg();

19253Register HiReg =MI.getOperand(1).getReg();

19254Register SrcReg =MI.getOperand(2).getReg();

19255

19256constTargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;

19257int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);

19258

19259TII.storeRegToStackSlot(*BB,MI, SrcReg,MI.getOperand(2).isKill(), FI, SrcRC,

19260 RI,Register());

19261MachinePointerInfo MPI =MachinePointerInfo::getFixedStack(MF, FI);

19262MachineMemOperand *MMOLo =

19263 MF.getMachineMemOperand(MPI,MachineMemOperand::MOLoad, 4,Align(8));

19264MachineMemOperand *MMOHi = MF.getMachineMemOperand(

19265 MPI.getWithOffset(4),MachineMemOperand::MOLoad, 4,Align(8));

19266BuildMI(*BB,MI,DL,TII.get(RISCV::LW), LoReg)

19267 .addFrameIndex(FI)

19268 .addImm(0)

19269 .addMemOperand(MMOLo);

19270BuildMI(*BB,MI,DL,TII.get(RISCV::LW), HiReg)

19271 .addFrameIndex(FI)

19272 .addImm(4)

19273 .addMemOperand(MMOHi);

19274MI.eraseFromParent();// The pseudo instruction is gone now.

19275return BB;

19276}

19277

19278staticMachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,

19279MachineBasicBlock *BB,

19280constRISCVSubtarget &Subtarget) {

19281assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&

19282"Unexpected instruction");

19283

19284MachineFunction &MF = *BB->getParent();

19285DebugLoc DL =MI.getDebugLoc();

19286constTargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();

19287constTargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();

19288Register DstReg =MI.getOperand(0).getReg();

19289Register LoReg =MI.getOperand(1).getReg();

19290Register HiReg =MI.getOperand(2).getReg();

19291

19292constTargetRegisterClass *DstRC = &RISCV::FPR64RegClass;

19293int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);

19294

19295MachinePointerInfo MPI =MachinePointerInfo::getFixedStack(MF, FI);

19296MachineMemOperand *MMOLo =

19297 MF.getMachineMemOperand(MPI,MachineMemOperand::MOStore, 4,Align(8));

19298MachineMemOperand *MMOHi = MF.getMachineMemOperand(

19299 MPI.getWithOffset(4),MachineMemOperand::MOStore, 4,Align(8));

19300BuildMI(*BB,MI,DL,TII.get(RISCV::SW))

19301 .addReg(LoReg,getKillRegState(MI.getOperand(1).isKill()))

19302 .addFrameIndex(FI)

19303 .addImm(0)

19304 .addMemOperand(MMOLo);

19305BuildMI(*BB,MI,DL,TII.get(RISCV::SW))

19306 .addReg(HiReg,getKillRegState(MI.getOperand(2).isKill()))

19307 .addFrameIndex(FI)

19308 .addImm(4)

19309 .addMemOperand(MMOHi);

19310TII.loadRegFromStackSlot(*BB,MI, DstReg, FI, DstRC, RI,Register());

19311MI.eraseFromParent();// The pseudo instruction is gone now.

19312return BB;

19313}

19314

19315staticboolisSelectPseudo(MachineInstr &MI) {

19316switch (MI.getOpcode()) {

19317default:

19318returnfalse;

19319case RISCV::Select_GPR_Using_CC_GPR:

19320case RISCV::Select_GPR_Using_CC_Imm:

19321case RISCV::Select_FPR16_Using_CC_GPR:

19322case RISCV::Select_FPR16INX_Using_CC_GPR:

19323case RISCV::Select_FPR32_Using_CC_GPR:

19324case RISCV::Select_FPR32INX_Using_CC_GPR:

19325case RISCV::Select_FPR64_Using_CC_GPR:

19326case RISCV::Select_FPR64INX_Using_CC_GPR:

19327case RISCV::Select_FPR64IN32X_Using_CC_GPR:

19328returntrue;

19329 }

19330}

19331

19332staticMachineBasicBlock *emitQuietFCMP(MachineInstr &MI,MachineBasicBlock *BB,

19333unsigned RelOpcode,unsigned EqOpcode,

19334constRISCVSubtarget &Subtarget) {

19335DebugLoc DL =MI.getDebugLoc();

19336Register DstReg =MI.getOperand(0).getReg();

19337Register Src1Reg =MI.getOperand(1).getReg();

19338Register Src2Reg =MI.getOperand(2).getReg();

19339MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();

19340Register SavedFFlags =MRI.createVirtualRegister(&RISCV::GPRRegClass);

19341constTargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();

19342

19343// Save the current FFLAGS.

19344BuildMI(*BB,MI,DL,TII.get(RISCV::ReadFFLAGS), SavedFFlags);

19345

19346auto MIB =BuildMI(*BB,MI,DL,TII.get(RelOpcode), DstReg)

19347 .addReg(Src1Reg)

19348 .addReg(Src2Reg);

19349if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

19350 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);

19351

19352// Restore the FFLAGS.

19353BuildMI(*BB,MI,DL,TII.get(RISCV::WriteFFLAGS))

19354 .addReg(SavedFFlags,RegState::Kill);

19355

19356// Issue a dummy FEQ opcode to raise exception for signaling NaNs.

19357auto MIB2 =BuildMI(*BB,MI,DL,TII.get(EqOpcode), RISCV::X0)

19358 .addReg(Src1Reg,getKillRegState(MI.getOperand(1).isKill()))

19359 .addReg(Src2Reg,getKillRegState(MI.getOperand(2).isKill()));

19360if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

19361 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);

19362

19363// Erase the pseudoinstruction.

19364MI.eraseFromParent();

19365return BB;

19366}

19367

19368staticMachineBasicBlock *

19369EmitLoweredCascadedSelect(MachineInstr &First,MachineInstr &Second,

19370MachineBasicBlock *ThisMBB,

19371constRISCVSubtarget &Subtarget) {

19372// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)

19373// Without this, custom-inserter would have generated:

19374//

19375// A

19376// | \

19377 // | B

19378// | /

19379// C

19380// | \

19381 // | D

19382// | /

19383// E

19384//

19385// A: X = ...; Y = ...

19386// B: empty

19387// C: Z = PHI [X, A], [Y, B]

19388// D: empty

19389// E: PHI [X, C], [Z, D]

19390//

19391// If we lower both Select_FPRX_ in a single step, we can instead generate:

19392//

19393// A

19394// | \

19395 // | C

19396// | /|

19397// |/ |

19398// | |

19399// | D

19400// | /

19401// E

19402//

19403// A: X = ...; Y = ...

19404// D: empty

19405// E: PHI [X, A], [X, C], [Y, D]

19406

19407constRISCVInstrInfo &TII = *Subtarget.getInstrInfo();

19408constDebugLoc &DL =First.getDebugLoc();

19409constBasicBlock *LLVM_BB = ThisMBB->getBasicBlock();

19410MachineFunction *F = ThisMBB->getParent();

19411MachineBasicBlock *FirstMBB =F->CreateMachineBasicBlock(LLVM_BB);

19412MachineBasicBlock *SecondMBB =F->CreateMachineBasicBlock(LLVM_BB);

19413MachineBasicBlock *SinkMBB =F->CreateMachineBasicBlock(LLVM_BB);

19414MachineFunction::iterator It = ++ThisMBB->getIterator();

19415F->insert(It, FirstMBB);

19416F->insert(It, SecondMBB);

19417F->insert(It, SinkMBB);

19418

19419// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.

19420 SinkMBB->splice(SinkMBB->begin(), ThisMBB,

19421 std::next(MachineBasicBlock::iterator(First)),

19422 ThisMBB->end());

19423 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);

19424

19425// Fallthrough block for ThisMBB.

19426 ThisMBB->addSuccessor(FirstMBB);

19427// Fallthrough block for FirstMBB.

19428 FirstMBB->addSuccessor(SecondMBB);

19429 ThisMBB->addSuccessor(SinkMBB);

19430 FirstMBB->addSuccessor(SinkMBB);

19431// This is fallthrough.

19432 SecondMBB->addSuccessor(SinkMBB);

19433

19434auto FirstCC =static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());

19435Register FLHS =First.getOperand(1).getReg();

19436Register FRHS =First.getOperand(2).getReg();

19437// Insert appropriate branch.

19438BuildMI(FirstMBB,DL,TII.getBrCond(FirstCC))

19439 .addReg(FLHS)

19440 .addReg(FRHS)

19441 .addMBB(SinkMBB);

19442

19443Register SLHS = Second.getOperand(1).getReg();

19444Register SRHS = Second.getOperand(2).getReg();

19445Register Op1Reg4 =First.getOperand(4).getReg();

19446Register Op1Reg5 =First.getOperand(5).getReg();

19447

19448auto SecondCC =static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());

19449// Insert appropriate branch.

19450BuildMI(ThisMBB,DL,TII.getBrCond(SecondCC))

19451 .addReg(SLHS)

19452 .addReg(SRHS)

19453 .addMBB(SinkMBB);

19454

19455Register DestReg = Second.getOperand(0).getReg();

19456Register Op2Reg4 = Second.getOperand(4).getReg();

19457BuildMI(*SinkMBB, SinkMBB->begin(),DL,TII.get(RISCV::PHI), DestReg)

19458 .addReg(Op2Reg4)

19459 .addMBB(ThisMBB)

19460 .addReg(Op1Reg4)

19461 .addMBB(FirstMBB)

19462 .addReg(Op1Reg5)

19463 .addMBB(SecondMBB);

19464

19465// Now remove the Select_FPRX_s.

19466First.eraseFromParent();

19467 Second.eraseFromParent();

19468return SinkMBB;

19469}

19470

19471staticMachineBasicBlock *emitSelectPseudo(MachineInstr &MI,

19472MachineBasicBlock *BB,

19473constRISCVSubtarget &Subtarget) {

19474// To "insert" Select_* instructions, we actually have to insert the triangle

19475// control-flow pattern. The incoming instructions know the destination vreg

19476// to set, the condition code register to branch on, the true/false values to

19477// select between, and the condcode to use to select the appropriate branch.

19478//

19479// We produce the following control flow:

19480// HeadMBB

19481// | \

19482 // | IfFalseMBB

19483// | /

19484// TailMBB

19485//

19486// When we find a sequence of selects we attempt to optimize their emission

19487// by sharing the control flow. Currently we only handle cases where we have

19488// multiple selects with the exact same condition (same LHS, RHS and CC).

19489// The selects may be interleaved with other instructions if the other

19490// instructions meet some requirements we deem safe:

19491// - They are not pseudo instructions.

19492// - They are debug instructions. Otherwise,

19493// - They do not have side-effects, do not access memory and their inputs do

19494// not depend on the results of the select pseudo-instructions.

19495// The TrueV/FalseV operands of the selects cannot depend on the result of

19496// previous selects in the sequence.

19497// These conditions could be further relaxed. See the X86 target for a

19498// related approach and more information.

19499//

19500// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))

19501// is checked here and handled by a separate function -

19502// EmitLoweredCascadedSelect.

19503

19504auto Next =next_nodbg(MI.getIterator(), BB->instr_end());

19505if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&

19506MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&

19507 Next != BB->end() && Next->getOpcode() ==MI.getOpcode() &&

19508 Next->getOperand(5).getReg() ==MI.getOperand(0).getReg() &&

19509 Next->getOperand(5).isKill())

19510returnEmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);

19511

19512Register LHS =MI.getOperand(1).getReg();

19513Register RHS;

19514if (MI.getOperand(2).isReg())

19515RHS =MI.getOperand(2).getReg();

19516autoCC =static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());

19517

19518SmallVector<MachineInstr *, 4> SelectDebugValues;

19519SmallSet<Register, 4> SelectDests;

19520 SelectDests.insert(MI.getOperand(0).getReg());

19521

19522MachineInstr *LastSelectPseudo = &MI;

19523for (auto E = BB->end(), SequenceMBBI =MachineBasicBlock::iterator(MI);

19524 SequenceMBBI != E; ++SequenceMBBI) {

19525if (SequenceMBBI->isDebugInstr())

19526continue;

19527if (isSelectPseudo(*SequenceMBBI)) {

19528if (SequenceMBBI->getOperand(1).getReg() !=LHS ||

19529 !SequenceMBBI->getOperand(2).isReg() ||

19530 SequenceMBBI->getOperand(2).getReg() !=RHS ||

19531 SequenceMBBI->getOperand(3).getImm() !=CC ||

19532 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||

19533 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))

19534break;

19535 LastSelectPseudo = &*SequenceMBBI;

19536 SequenceMBBI->collectDebugValues(SelectDebugValues);

19537 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());

19538continue;

19539 }

19540if (SequenceMBBI->hasUnmodeledSideEffects() ||

19541 SequenceMBBI->mayLoadOrStore() ||

19542 SequenceMBBI->usesCustomInsertionHook())

19543break;

19544if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {

19545 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());

19546 }))

19547break;

19548 }

19549

19550constRISCVInstrInfo &TII = *Subtarget.getInstrInfo();

19551constBasicBlock *LLVM_BB = BB->getBasicBlock();

19552DebugLoc DL =MI.getDebugLoc();

19553MachineFunction::iterator I = ++BB->getIterator();

19554

19555MachineBasicBlock *HeadMBB = BB;

19556MachineFunction *F = BB->getParent();

19557MachineBasicBlock *TailMBB =F->CreateMachineBasicBlock(LLVM_BB);

19558MachineBasicBlock *IfFalseMBB =F->CreateMachineBasicBlock(LLVM_BB);

19559

19560F->insert(I, IfFalseMBB);

19561F->insert(I, TailMBB);

19562

19563// Set the call frame size on entry to the new basic blocks.

19564unsigned CallFrameSize =TII.getCallFrameSizeAt(*LastSelectPseudo);

19565 IfFalseMBB->setCallFrameSize(CallFrameSize);

19566 TailMBB->setCallFrameSize(CallFrameSize);

19567

19568// Transfer debug instructions associated with the selects to TailMBB.

19569for (MachineInstr *DebugInstr : SelectDebugValues) {

19570 TailMBB->push_back(DebugInstr->removeFromParent());

19571 }

19572

19573// Move all instructions after the sequence to TailMBB.

19574 TailMBB->splice(TailMBB->end(), HeadMBB,

19575 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());

19576// Update machine-CFG edges by transferring all successors of the current

19577// block to the new block which will contain the Phi nodes for the selects.

19578 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);

19579// Set the successors for HeadMBB.

19580 HeadMBB->addSuccessor(IfFalseMBB);

19581 HeadMBB->addSuccessor(TailMBB);

19582

19583// Insert appropriate branch.

19584if (MI.getOperand(2).isImm())

19585BuildMI(HeadMBB,DL,TII.getBrCond(CC,MI.getOperand(2).isImm()))

19586 .addReg(LHS)

19587 .addImm(MI.getOperand(2).getImm())

19588 .addMBB(TailMBB);

19589else

19590BuildMI(HeadMBB,DL,TII.getBrCond(CC))

19591 .addReg(LHS)

19592 .addReg(RHS)

19593 .addMBB(TailMBB);

19594

19595// IfFalseMBB just falls through to TailMBB.

19596 IfFalseMBB->addSuccessor(TailMBB);

19597

19598// Create PHIs for all of the select pseudo-instructions.

19599auto SelectMBBI =MI.getIterator();

19600auto SelectEnd = std::next(LastSelectPseudo->getIterator());

19601autoInsertionPoint = TailMBB->begin();

19602while (SelectMBBI != SelectEnd) {

19603auto Next = std::next(SelectMBBI);

19604if (isSelectPseudo(*SelectMBBI)) {

19605// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]

19606BuildMI(*TailMBB,InsertionPoint, SelectMBBI->getDebugLoc(),

19607TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())

19608 .addReg(SelectMBBI->getOperand(4).getReg())

19609 .addMBB(HeadMBB)

19610 .addReg(SelectMBBI->getOperand(5).getReg())

19611 .addMBB(IfFalseMBB);

19612 SelectMBBI->eraseFromParent();

19613 }

19614 SelectMBBI = Next;

19615 }

19616

19617F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);

19618return TailMBB;

19619}

19620

19621// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.

19622staticconstRISCV::RISCVMaskedPseudoInfo *

19623lookupMaskedIntrinsic(uint16_t MCOpcode,RISCVII::VLMUL LMul,unsigned SEW) {

19624constRISCVVInversePseudosTable::PseudoInfo *Inverse =

19625 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);

19626assert(Inverse &&"Unexpected LMUL and SEW pair for instruction");

19627constRISCV::RISCVMaskedPseudoInfo *Masked =

19628 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);

19629assert(Masked &&"Could not find masked instruction for LMUL and SEW pair");

19630returnMasked;

19631}

19632

19633staticMachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,

19634MachineBasicBlock *BB,

19635unsigned CVTXOpc) {

19636DebugLoc DL =MI.getDebugLoc();

19637

19638constTargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();

19639

19640MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();

19641Register SavedFFLAGS =MRI.createVirtualRegister(&RISCV::GPRRegClass);

19642

19643// Save the old value of FFLAGS.

19644BuildMI(*BB,MI,DL,TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);

19645

19646assert(MI.getNumOperands() == 7);

19647

19648// Emit a VFCVT_X_F

19649constTargetRegisterInfo *TRI =

19650 BB->getParent()->getSubtarget().getRegisterInfo();

19651constTargetRegisterClass *RC =MI.getRegClassConstraint(0, &TII,TRI);

19652Register Tmp =MRI.createVirtualRegister(RC);

19653BuildMI(*BB,MI,DL,TII.get(CVTXOpc), Tmp)

19654 .add(MI.getOperand(1))

19655 .add(MI.getOperand(2))

19656 .add(MI.getOperand(3))

19657 .add(MachineOperand::CreateImm(7))// frm = DYN

19658 .add(MI.getOperand(4))

19659 .add(MI.getOperand(5))

19660 .add(MI.getOperand(6))

19661 .add(MachineOperand::CreateReg(RISCV::FRM,

19662/*IsDef*/false,

19663/*IsImp*/true));

19664

19665// Emit a VFCVT_F_X

19666RISCVII::VLMUL LMul =RISCVII::getLMul(MI.getDesc().TSFlags);

19667unsigned Log2SEW =MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();

19668// There is no E8 variant for VFCVT_F_X.

19669assert(Log2SEW >= 4);

19670unsigned CVTFOpc =

19671lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)

19672 ->MaskedPseudo;

19673

19674BuildMI(*BB,MI,DL,TII.get(CVTFOpc))

19675 .add(MI.getOperand(0))

19676 .add(MI.getOperand(1))

19677 .addReg(Tmp)

19678 .add(MI.getOperand(3))

19679 .add(MachineOperand::CreateImm(7))// frm = DYN

19680 .add(MI.getOperand(4))

19681 .add(MI.getOperand(5))

19682 .add(MI.getOperand(6))

19683 .add(MachineOperand::CreateReg(RISCV::FRM,

19684/*IsDef*/false,

19685/*IsImp*/true));

19686

19687// Restore FFLAGS.

19688BuildMI(*BB,MI,DL,TII.get(RISCV::WriteFFLAGS))

19689 .addReg(SavedFFLAGS,RegState::Kill);

19690

19691// Erase the pseudoinstruction.

19692MI.eraseFromParent();

19693return BB;

19694}

19695

19696staticMachineBasicBlock *emitFROUND(MachineInstr &MI,MachineBasicBlock *MBB,

19697constRISCVSubtarget &Subtarget) {

19698unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;

19699constTargetRegisterClass *RC;

19700switch (MI.getOpcode()) {

19701default:

19702llvm_unreachable("Unexpected opcode");

19703case RISCV::PseudoFROUND_H:

19704 CmpOpc = RISCV::FLT_H;

19705 F2IOpc = RISCV::FCVT_W_H;

19706 I2FOpc = RISCV::FCVT_H_W;

19707 FSGNJOpc = RISCV::FSGNJ_H;

19708 FSGNJXOpc = RISCV::FSGNJX_H;

19709 RC = &RISCV::FPR16RegClass;

19710break;

19711case RISCV::PseudoFROUND_H_INX:

19712 CmpOpc = RISCV::FLT_H_INX;

19713 F2IOpc = RISCV::FCVT_W_H_INX;

19714 I2FOpc = RISCV::FCVT_H_W_INX;

19715 FSGNJOpc = RISCV::FSGNJ_H_INX;

19716 FSGNJXOpc = RISCV::FSGNJX_H_INX;

19717 RC = &RISCV::GPRF16RegClass;

19718break;

19719case RISCV::PseudoFROUND_S:

19720 CmpOpc = RISCV::FLT_S;

19721 F2IOpc = RISCV::FCVT_W_S;

19722 I2FOpc = RISCV::FCVT_S_W;

19723 FSGNJOpc = RISCV::FSGNJ_S;

19724 FSGNJXOpc = RISCV::FSGNJX_S;

19725 RC = &RISCV::FPR32RegClass;

19726break;

19727case RISCV::PseudoFROUND_S_INX:

19728 CmpOpc = RISCV::FLT_S_INX;

19729 F2IOpc = RISCV::FCVT_W_S_INX;

19730 I2FOpc = RISCV::FCVT_S_W_INX;

19731 FSGNJOpc = RISCV::FSGNJ_S_INX;

19732 FSGNJXOpc = RISCV::FSGNJX_S_INX;

19733 RC = &RISCV::GPRF32RegClass;

19734break;

19735case RISCV::PseudoFROUND_D:

19736assert(Subtarget.is64Bit() &&"Expected 64-bit GPR.");

19737 CmpOpc = RISCV::FLT_D;

19738 F2IOpc = RISCV::FCVT_L_D;

19739 I2FOpc = RISCV::FCVT_D_L;

19740 FSGNJOpc = RISCV::FSGNJ_D;

19741 FSGNJXOpc = RISCV::FSGNJX_D;

19742 RC = &RISCV::FPR64RegClass;

19743break;

19744case RISCV::PseudoFROUND_D_INX:

19745assert(Subtarget.is64Bit() &&"Expected 64-bit GPR.");

19746 CmpOpc = RISCV::FLT_D_INX;

19747 F2IOpc = RISCV::FCVT_L_D_INX;

19748 I2FOpc = RISCV::FCVT_D_L_INX;

19749 FSGNJOpc = RISCV::FSGNJ_D_INX;

19750 FSGNJXOpc = RISCV::FSGNJX_D_INX;

19751 RC = &RISCV::GPRRegClass;

19752break;

19753 }

19754

19755constBasicBlock *BB =MBB->getBasicBlock();

19756DebugLoc DL =MI.getDebugLoc();

19757MachineFunction::iterator I = ++MBB->getIterator();

19758

19759MachineFunction *F =MBB->getParent();

19760MachineBasicBlock *CvtMBB =F->CreateMachineBasicBlock(BB);

19761MachineBasicBlock *DoneMBB =F->CreateMachineBasicBlock(BB);

19762

19763F->insert(I, CvtMBB);

19764F->insert(I, DoneMBB);

19765// Move all instructions after the sequence to DoneMBB.

19766 DoneMBB->splice(DoneMBB->end(),MBB,MachineBasicBlock::iterator(MI),

19767MBB->end());

19768// Update machine-CFG edges by transferring all successors of the current

19769// block to the new block which will contain the Phi nodes for the selects.

19770 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);

19771// Set the successors for MBB.

19772MBB->addSuccessor(CvtMBB);

19773MBB->addSuccessor(DoneMBB);

19774

19775Register DstReg =MI.getOperand(0).getReg();

19776Register SrcReg =MI.getOperand(1).getReg();

19777Register MaxReg =MI.getOperand(2).getReg();

19778 int64_t FRM =MI.getOperand(3).getImm();

19779

19780constRISCVInstrInfo &TII = *Subtarget.getInstrInfo();

19781MachineRegisterInfo &MRI =MBB->getParent()->getRegInfo();

19782

19783Register FabsReg =MRI.createVirtualRegister(RC);

19784BuildMI(MBB,DL,TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);

19785

19786// Compare the FP value to the max value.

19787Register CmpReg =MRI.createVirtualRegister(&RISCV::GPRRegClass);

19788auto MIB =

19789BuildMI(MBB,DL,TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);

19790if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

19791 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);

19792

19793// Insert branch.

19794BuildMI(MBB,DL,TII.get(RISCV::BEQ))

19795 .addReg(CmpReg)

19796 .addReg(RISCV::X0)

19797 .addMBB(DoneMBB);

19798

19799 CvtMBB->addSuccessor(DoneMBB);

19800

19801// Convert to integer.

19802Register F2IReg =MRI.createVirtualRegister(&RISCV::GPRRegClass);

19803 MIB =BuildMI(CvtMBB,DL,TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);

19804if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

19805 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);

19806

19807// Convert back to FP.

19808Register I2FReg =MRI.createVirtualRegister(RC);

19809 MIB =BuildMI(CvtMBB,DL,TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);

19810if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))

19811 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);

19812

19813// Restore the sign bit.

19814Register CvtReg =MRI.createVirtualRegister(RC);

19815BuildMI(CvtMBB,DL,TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);

19816

19817// Merge the results.

19818BuildMI(*DoneMBB, DoneMBB->begin(),DL,TII.get(RISCV::PHI), DstReg)

19819 .addReg(SrcReg)

19820 .addMBB(MBB)

19821 .addReg(CvtReg)

19822 .addMBB(CvtMBB);

19823

19824MI.eraseFromParent();

19825return DoneMBB;

19826}

19827

19828MachineBasicBlock *

19829RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

19830MachineBasicBlock *BB) const{

19831switch (MI.getOpcode()) {

19832default:

19833llvm_unreachable("Unexpected instr type to insert");

19834case RISCV::ReadCounterWide:

19835assert(!Subtarget.is64Bit() &&

19836"ReadCounterWide is only to be used on riscv32");

19837returnemitReadCounterWidePseudo(MI, BB);

19838case RISCV::Select_GPR_Using_CC_GPR:

19839case RISCV::Select_GPR_Using_CC_Imm:

19840case RISCV::Select_FPR16_Using_CC_GPR:

19841case RISCV::Select_FPR16INX_Using_CC_GPR:

19842case RISCV::Select_FPR32_Using_CC_GPR:

19843case RISCV::Select_FPR32INX_Using_CC_GPR:

19844case RISCV::Select_FPR64_Using_CC_GPR:

19845case RISCV::Select_FPR64INX_Using_CC_GPR:

19846case RISCV::Select_FPR64IN32X_Using_CC_GPR:

19847returnemitSelectPseudo(MI, BB, Subtarget);

19848case RISCV::BuildPairF64Pseudo:

19849returnemitBuildPairF64Pseudo(MI, BB, Subtarget);

19850case RISCV::SplitF64Pseudo:

19851returnemitSplitF64Pseudo(MI, BB, Subtarget);

19852case RISCV::PseudoQuietFLE_H:

19853returnemitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);

19854case RISCV::PseudoQuietFLE_H_INX:

19855returnemitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);

19856case RISCV::PseudoQuietFLT_H:

19857returnemitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);

19858case RISCV::PseudoQuietFLT_H_INX:

19859returnemitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);

19860case RISCV::PseudoQuietFLE_S:

19861returnemitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);

19862case RISCV::PseudoQuietFLE_S_INX:

19863returnemitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);

19864case RISCV::PseudoQuietFLT_S:

19865returnemitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);

19866case RISCV::PseudoQuietFLT_S_INX:

19867returnemitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);

19868case RISCV::PseudoQuietFLE_D:

19869returnemitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);

19870case RISCV::PseudoQuietFLE_D_INX:

19871returnemitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);

19872case RISCV::PseudoQuietFLE_D_IN32X:

19873returnemitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,

19874 Subtarget);

19875case RISCV::PseudoQuietFLT_D:

19876returnemitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);

19877case RISCV::PseudoQuietFLT_D_INX:

19878returnemitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);

19879case RISCV::PseudoQuietFLT_D_IN32X:

19880returnemitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,

19881 Subtarget);

19882

19883case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:

19884returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);

19885case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:

19886returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);

19887case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:

19888returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);

19889case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:

19890returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);

19891case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:

19892returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);

19893case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:

19894returnemitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);

19895case RISCV::PseudoFROUND_H:

19896case RISCV::PseudoFROUND_H_INX:

19897case RISCV::PseudoFROUND_S:

19898case RISCV::PseudoFROUND_S_INX:

19899case RISCV::PseudoFROUND_D:

19900case RISCV::PseudoFROUND_D_INX:

19901case RISCV::PseudoFROUND_D_IN32X:

19902returnemitFROUND(MI, BB, Subtarget);

19903case RISCV::PROBED_STACKALLOC_DYN:

19904returnemitDynamicProbedAlloc(MI, BB);

19905case TargetOpcode::STATEPOINT:

19906// STATEPOINT is a pseudo instruction which has no implicit defs/uses

19907// while jal call instruction (where statepoint will be lowered at the end)

19908// has implicit def. This def is early-clobber as it will be set at

19909// the moment of the call and earlier than any use is read.

19910// Add this implicit dead def here as a workaround.

19911MI.addOperand(*MI.getMF(),

19912MachineOperand::CreateReg(

19913 RISCV::X1,/*isDef*/true,

19914/*isImp*/true,/*isKill*/false,/*isDead*/true,

19915/*isUndef*/false,/*isEarlyClobber*/true));

19916 [[fallthrough]];

19917case TargetOpcode::STACKMAP:

19918case TargetOpcode::PATCHPOINT:

19919if (!Subtarget.is64Bit())

19920report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "

19921"supported on 64-bit targets");

19922returnemitPatchPoint(MI, BB);

19923 }

19924}

19925

19926voidRISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,

19927SDNode *Node) const{

19928// Add FRM dependency to any instructions with dynamic rounding mode.

19929intIdx =RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);

19930if (Idx < 0) {

19931// Vector pseudos have FRM index indicated by TSFlags.

19932Idx =RISCVII::getFRMOpNum(MI.getDesc());

19933if (Idx < 0)

19934return;

19935 }

19936if (MI.getOperand(Idx).getImm() !=RISCVFPRndMode::DYN)

19937return;

19938// If the instruction already reads FRM, don't add another read.

19939if (MI.readsRegister(RISCV::FRM,/*TRI=*/nullptr))

19940return;

19941MI.addOperand(

19942MachineOperand::CreateReg(RISCV::FRM,/*isDef*/false,/*isImp*/true));

19943}

19944

19945void RISCVTargetLowering::analyzeInputArgs(

19946MachineFunction &MF,CCState &CCInfo,

19947constSmallVectorImpl<ISD::InputArg> &Ins,bool IsRet,

19948RISCVCCAssignFn Fn) const{

19949unsigned NumArgs = Ins.size();

19950FunctionType *FType = MF.getFunction().getFunctionType();

19951

19952for (unsigned i = 0; i != NumArgs; ++i) {

19953MVT ArgVT = Ins[i].VT;

19954ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;

19955

19956Type *ArgTy =nullptr;

19957if (IsRet)

19958 ArgTy = FType->getReturnType();

19959elseif (Ins[i].isOrigArg())

19960 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());

19961

19962if (Fn(i, ArgVT, ArgVT,CCValAssign::Full, ArgFlags, CCInfo,

19963/*IsFixed=*/true, IsRet, ArgTy)) {

19964LLVM_DEBUG(dbgs() <<"InputArg #" << i <<" has unhandled type "

19965 << ArgVT <<'\n');

19966llvm_unreachable(nullptr);

19967 }

19968 }

19969}

19970

19971void RISCVTargetLowering::analyzeOutputArgs(

19972MachineFunction &MF,CCState &CCInfo,

19973constSmallVectorImpl<ISD::OutputArg> &Outs,bool IsRet,

19974 CallLoweringInfo *CLI,RISCVCCAssignFn Fn) const{

19975unsigned NumArgs = Outs.size();

19976

19977for (unsigned i = 0; i != NumArgs; i++) {

19978MVT ArgVT = Outs[i].VT;

19979ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;

19980Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty :nullptr;

19981

19982if (Fn(i, ArgVT, ArgVT,CCValAssign::Full, ArgFlags, CCInfo,

19983 Outs[i].IsFixed, IsRet, OrigTy)) {

19984LLVM_DEBUG(dbgs() <<"OutputArg #" << i <<" has unhandled type "

19985 << ArgVT <<"\n");

19986llvm_unreachable(nullptr);

19987 }

19988 }

19989}

19990

19991// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect

19992// values.

19993staticSDValue convertLocVTToValVT(SelectionDAG &DAG,SDValue Val,

19994constCCValAssign &VA,constSDLoc &DL,

19995constRISCVSubtarget &Subtarget) {

19996if (VA.needsCustom()) {

19997if (VA.getLocVT().isInteger() &&

19998 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))

19999return DAG.getNode(RISCVISD::FMV_H_X,DL, VA.getValVT(), Val);

20000if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)

20001return DAG.getNode(RISCVISD::FMV_W_X_RV64,DL, MVT::f32, Val);

20002if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())

20003returnconvertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);

20004llvm_unreachable("Unexpected Custom handling.");

20005 }

20006

20007switch (VA.getLocInfo()) {

20008default:

20009llvm_unreachable("Unexpected CCValAssign::LocInfo");

20010caseCCValAssign::Full:

20011break;

20012caseCCValAssign::BCvt:

20013 Val = DAG.getNode(ISD::BITCAST,DL, VA.getValVT(), Val);

20014break;

20015 }

20016return Val;

20017}

20018

20019// The caller is responsible for loading the full value if the argument is

20020// passed with CCValAssign::Indirect.

20021staticSDValue unpackFromRegLoc(SelectionDAG &DAG,SDValue Chain,

20022constCCValAssign &VA,constSDLoc &DL,

20023constISD::InputArg &In,

20024constRISCVTargetLowering &TLI) {

20025MachineFunction &MF = DAG.getMachineFunction();

20026MachineRegisterInfo &RegInfo = MF.getRegInfo();

20027EVT LocVT = VA.getLocVT();

20028SDValue Val;

20029constTargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());

20030Register VReg =RegInfo.createVirtualRegister(RC);

20031RegInfo.addLiveIn(VA.getLocReg(), VReg);

20032 Val = DAG.getCopyFromReg(Chain,DL, VReg, LocVT);

20033

20034// If input is sign extended from 32 bits, note it for the SExtWRemoval pass.

20035if (In.isOrigArg()) {

20036Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());

20037if (OrigArg->getType()->isIntegerTy()) {

20038unsignedBitWidth = OrigArg->getType()->getIntegerBitWidth();

20039// An input zero extended from i31 can also be considered sign extended.

20040if ((BitWidth <= 32 && In.Flags.isSExt()) ||

20041 (BitWidth < 32 && In.Flags.isZExt())) {

20042RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

20043 RVFI->addSExt32Register(VReg);

20044 }

20045 }

20046 }

20047

20048if (VA.getLocInfo() ==CCValAssign::Indirect)

20049return Val;

20050

20051returnconvertLocVTToValVT(DAG, Val, VA,DL, TLI.getSubtarget());

20052}

20053

20054staticSDValue convertValVTToLocVT(SelectionDAG &DAG,SDValue Val,

20055constCCValAssign &VA,constSDLoc &DL,

20056constRISCVSubtarget &Subtarget) {

20057EVT LocVT = VA.getLocVT();

20058

20059if (VA.needsCustom()) {

20060if (LocVT.isInteger() &&

20061 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))

20062return DAG.getNode(RISCVISD::FMV_X_ANYEXTH,DL, LocVT, Val);

20063if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)

20064return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64,DL, MVT::i64, Val);

20065if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())

20066returnconvertToScalableVector(LocVT, Val, DAG, Subtarget);

20067llvm_unreachable("Unexpected Custom handling.");

20068 }

20069

20070switch (VA.getLocInfo()) {

20071default:

20072llvm_unreachable("Unexpected CCValAssign::LocInfo");

20073caseCCValAssign::Full:

20074break;

20075caseCCValAssign::BCvt:

20076 Val = DAG.getNode(ISD::BITCAST,DL, LocVT, Val);

20077break;

20078 }

20079return Val;

20080}

20081

20082// The caller is responsible for loading the full value if the argument is

20083// passed with CCValAssign::Indirect.

20084staticSDValue unpackFromMemLoc(SelectionDAG &DAG,SDValue Chain,

20085constCCValAssign &VA,constSDLoc &DL) {

20086MachineFunction &MF = DAG.getMachineFunction();

20087MachineFrameInfo &MFI = MF.getFrameInfo();

20088EVT LocVT = VA.getLocVT();

20089EVT ValVT = VA.getValVT();

20090EVT PtrVT =MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));

20091if (VA.getLocInfo() ==CCValAssign::Indirect) {

20092// When the value is a scalable vector, we save the pointer which points to

20093// the scalable vector value in the stack. The ValVT will be the pointer

20094// type, instead of the scalable vector type.

20095 ValVT = LocVT;

20096 }

20097int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),

20098/*IsImmutable=*/true);

20099SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

20100SDValue Val;

20101

20102ISD::LoadExtType ExtType =ISD::NON_EXTLOAD;

20103switch (VA.getLocInfo()) {

20104default:

20105llvm_unreachable("Unexpected CCValAssign::LocInfo");

20106caseCCValAssign::Full:

20107caseCCValAssign::Indirect:

20108caseCCValAssign::BCvt:

20109break;

20110 }

20111 Val = DAG.getExtLoad(

20112 ExtType,DL, LocVT, Chain, FIN,

20113MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);

20114return Val;

20115}

20116

20117staticSDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG,SDValue Chain,

20118constCCValAssign &VA,

20119constCCValAssign &HiVA,

20120constSDLoc &DL) {

20121assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&

20122"Unexpected VA");

20123MachineFunction &MF = DAG.getMachineFunction();

20124MachineFrameInfo &MFI = MF.getFrameInfo();

20125MachineRegisterInfo &RegInfo = MF.getRegInfo();

20126

20127assert(VA.isRegLoc() &&"Expected register VA assignment");

20128

20129Register LoVReg =RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

20130RegInfo.addLiveIn(VA.getLocReg(), LoVReg);

20131SDValue Lo = DAG.getCopyFromReg(Chain,DL, LoVReg, MVT::i32);

20132SDValue Hi;

20133if (HiVA.isMemLoc()) {

20134// Second half of f64 is passed on the stack.

20135int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),

20136/*IsImmutable=*/true);

20137SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);

20138Hi = DAG.getLoad(MVT::i32,DL, Chain, FIN,

20139MachinePointerInfo::getFixedStack(MF, FI));

20140 }else {

20141// Second half of f64 is passed in another GPR.

20142Register HiVReg =RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

20143RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);

20144Hi = DAG.getCopyFromReg(Chain,DL, HiVReg, MVT::i32);

20145 }

20146return DAG.getNode(RISCVISD::BuildPairF64,DL, MVT::f64,Lo,Hi);

20147}

20148

20149// Transform physical registers into virtual registers.

20150SDValue RISCVTargetLowering::LowerFormalArguments(

20151SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,

20152constSmallVectorImpl<ISD::InputArg> &Ins,constSDLoc &DL,

20153SelectionDAG &DAG,SmallVectorImpl<SDValue> &InVals) const{

20154

20155MachineFunction &MF = DAG.getMachineFunction();

20156

20157switch (CallConv) {

20158default:

20159report_fatal_error("Unsupported calling convention");

20160caseCallingConv::C:

20161caseCallingConv::Fast:

20162caseCallingConv::SPIR_KERNEL:

20163caseCallingConv::GRAAL:

20164caseCallingConv::RISCV_VectorCall:

20165break;

20166caseCallingConv::GHC:

20167if (Subtarget.hasStdExtE())

20168report_fatal_error("GHC calling convention is not supported on RVE!");

20169if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())

20170report_fatal_error("GHC calling convention requires the (Zfinx/F) and "

20171"(Zdinx/D) instruction set extensions");

20172 }

20173

20174constFunction &Func = MF.getFunction();

20175if (Func.hasFnAttribute("interrupt")) {

20176if (!Func.arg_empty())

20177report_fatal_error(

20178"Functions with the interrupt attribute cannot have arguments!");

20179

20180StringRef Kind =

20181 MF.getFunction().getFnAttribute("interrupt").getValueAsString();

20182

20183if (!(Kind =="user" || Kind =="supervisor" || Kind =="machine"))

20184report_fatal_error(

20185"Function interrupt attribute argument not supported!");

20186 }

20187

20188EVT PtrVT =getPointerTy(DAG.getDataLayout());

20189MVT XLenVT = Subtarget.getXLenVT();

20190unsigned XLenInBytes = Subtarget.getXLen() / 8;

20191// Used with vargs to acumulate store chains.

20192 std::vector<SDValue> OutChains;

20193

20194// Assign locations to all of the incoming arguments.

20195SmallVector<CCValAssign, 16> ArgLocs;

20196CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());

20197

20198if (CallConv ==CallingConv::GHC)

20199 CCInfo.AnalyzeFormalArguments(Ins,CC_RISCV_GHC);

20200else

20201 analyzeInputArgs(MF, CCInfo, Ins,/*IsRet=*/false,

20202 CallConv ==CallingConv::Fast ?CC_RISCV_FastCC

20203 :CC_RISCV);

20204

20205for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {

20206CCValAssign &VA = ArgLocs[i];

20207SDValue ArgValue;

20208// Passing f64 on RV32D with a soft float ABI must be handled as a special

20209// case.

20210if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {

20211assert(VA.needsCustom());

20212 ArgValue =unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i],DL);

20213 }elseif (VA.isRegLoc())

20214 ArgValue =unpackFromRegLoc(DAG, Chain, VA,DL, Ins[InsIdx], *this);

20215else

20216 ArgValue =unpackFromMemLoc(DAG, Chain, VA,DL);

20217

20218if (VA.getLocInfo() ==CCValAssign::Indirect) {

20219// If the original argument was split and passed by reference (e.g. i128

20220// on RV32), we need to load all parts of it here (using the same

20221// address). Vectors may be partly split to registers and partly to the

20222// stack, in which case the base address is partly offset and subsequent

20223// stores are relative to that.

20224 InVals.push_back(DAG.getLoad(VA.getValVT(),DL, Chain, ArgValue,

20225MachinePointerInfo()));

20226unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;

20227unsigned ArgPartOffset = Ins[InsIdx].PartOffset;

20228assert(VA.getValVT().isVector() || ArgPartOffset == 0);

20229while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {

20230CCValAssign &PartVA = ArgLocs[i + 1];

20231unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;

20232SDValue Offset = DAG.getIntPtrConstant(PartOffset,DL);

20233if (PartVA.getValVT().isScalableVector())

20234Offset = DAG.getNode(ISD::VSCALE,DL, XLenVT,Offset);

20235SDValue Address = DAG.getNode(ISD::ADD,DL, PtrVT, ArgValue,Offset);

20236 InVals.push_back(DAG.getLoad(PartVA.getValVT(),DL, Chain,Address,

20237MachinePointerInfo()));

20238 ++i;

20239 ++InsIdx;

20240 }

20241continue;

20242 }

20243 InVals.push_back(ArgValue);

20244 }

20245

20246if (any_of(ArgLocs,

20247 [](CCValAssign &VA) {return VA.getLocVT().isScalableVector(); }))

20248 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();

20249

20250if (IsVarArg) {

20251ArrayRef<MCPhysReg> ArgRegs =RISCV::getArgGPRs(Subtarget.getTargetABI());

20252unsignedIdx = CCInfo.getFirstUnallocated(ArgRegs);

20253constTargetRegisterClass *RC = &RISCV::GPRRegClass;

20254MachineFrameInfo &MFI = MF.getFrameInfo();

20255MachineRegisterInfo &RegInfo = MF.getRegInfo();

20256RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();

20257

20258// Size of the vararg save area. For now, the varargs save area is either

20259// zero or large enough to hold a0-a7.

20260int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() -Idx);

20261int FI;

20262

20263// If all registers are allocated, then all varargs must be passed on the

20264// stack and we don't need to save any argregs.

20265if (VarArgsSaveSize == 0) {

20266int VaArgOffset = CCInfo.getStackSize();

20267 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset,true);

20268 }else {

20269int VaArgOffset = -VarArgsSaveSize;

20270 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset,true);

20271

20272// If saving an odd number of registers then create an extra stack slot to

20273// ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures

20274// offsets to even-numbered registered remain 2*XLEN-aligned.

20275if (Idx % 2) {

20276 MFI.CreateFixedObject(

20277 XLenInBytes, VaArgOffset -static_cast<int>(XLenInBytes),true);

20278 VarArgsSaveSize += XLenInBytes;

20279 }

20280

20281SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

20282

20283// Copy the integer registers that may have been used for passing varargs

20284// to the vararg save area.

20285for (unsignedI =Idx;I < ArgRegs.size(); ++I) {

20286constRegister Reg = RegInfo.createVirtualRegister(RC);

20287 RegInfo.addLiveIn(ArgRegs[I], Reg);

20288SDValue ArgValue = DAG.getCopyFromReg(Chain,DL, Reg, XLenVT);

20289SDValue Store = DAG.getStore(

20290 Chain,DL, ArgValue, FIN,

20291MachinePointerInfo::getFixedStack(MF, FI, (I -Idx) * XLenInBytes));

20292 OutChains.push_back(Store);

20293 FIN =

20294 DAG.getMemBasePlusOffset(FIN,TypeSize::getFixed(XLenInBytes),DL);

20295 }

20296 }

20297

20298// Record the frame index of the first variable argument

20299// which is a value necessary to VASTART.

20300 RVFI->setVarArgsFrameIndex(FI);

20301 RVFI->setVarArgsSaveSize(VarArgsSaveSize);

20302 }

20303

20304// All stores are grouped in one node to allow the matching between

20305// the size of Ins and InVals. This only happens for vararg functions.

20306if (!OutChains.empty()) {

20307 OutChains.push_back(Chain);

20308 Chain = DAG.getNode(ISD::TokenFactor,DL, MVT::Other, OutChains);

20309 }

20310

20311return Chain;

20312}

20313

20314/// isEligibleForTailCallOptimization - Check whether the call is eligible

20315/// for tail call optimization.

20316/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.

20317bool RISCVTargetLowering::isEligibleForTailCallOptimization(

20318CCState &CCInfo, CallLoweringInfo &CLI,MachineFunction &MF,

20319constSmallVector<CCValAssign, 16> &ArgLocs) const{

20320

20321auto CalleeCC = CLI.CallConv;

20322auto &Outs = CLI.Outs;

20323auto &Caller = MF.getFunction();

20324auto CallerCC = Caller.getCallingConv();

20325

20326// Exception-handling functions need a special set of instructions to

20327// indicate a return to the hardware. Tail-calling another function would

20328// probably break this.

20329// TODO: The "interrupt" attribute isn't currently defined by RISC-V. This

20330// should be expanded as new function attributes are introduced.

20331if (Caller.hasFnAttribute("interrupt"))

20332returnfalse;

20333

20334// Do not tail call opt if the stack is used to pass parameters.

20335if (CCInfo.getStackSize() != 0)

20336returnfalse;

20337

20338// Do not tail call opt if any parameters need to be passed indirectly.

20339// Since long doubles (fp128) and i128 are larger than 2*XLEN, they are

20340// passed indirectly. So the address of the value will be passed in a

20341// register, or if not available, then the address is put on the stack. In

20342// order to pass indirectly, space on the stack often needs to be allocated

20343// in order to store the value. In this case the CCInfo.getNextStackOffset()

20344// != 0 check is not enough and we need to check if any CCValAssign ArgsLocs

20345// are passed CCValAssign::Indirect.

20346for (auto &VA : ArgLocs)

20347if (VA.getLocInfo() ==CCValAssign::Indirect)

20348returnfalse;

20349

20350// Do not tail call opt if either caller or callee uses struct return

20351// semantics.

20352auto IsCallerStructRet = Caller.hasStructRetAttr();

20353auto IsCalleeStructRet = Outs.empty() ?false : Outs[0].Flags.isSRet();

20354if (IsCallerStructRet || IsCalleeStructRet)

20355returnfalse;

20356

20357// The callee has to preserve all registers the caller needs to preserve.

20358constRISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();

20359constuint32_t *CallerPreserved =TRI->getCallPreservedMask(MF, CallerCC);

20360if (CalleeCC != CallerCC) {

20361constuint32_t *CalleePreserved =TRI->getCallPreservedMask(MF, CalleeCC);

20362if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))

20363returnfalse;

20364 }

20365

20366// Byval parameters hand the function a pointer directly into the stack area

20367// we want to reuse during a tail call. Working around this *is* possible

20368// but less efficient and uglier in LowerCall.

20369for (auto &Arg : Outs)

20370if (Arg.Flags.isByVal())

20371returnfalse;

20372

20373returntrue;

20374}

20375

20376staticAlign getPrefTypeAlign(EVT VT,SelectionDAG &DAG) {

20377return DAG.getDataLayout().getPrefTypeAlign(

20378 VT.getTypeForEVT(*DAG.getContext()));

20379}

20380

20381// Lower a call to a callseq_start + CALL + callseq_end chain, and add input

20382// and output parameter nodes.

20383SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,

20384SmallVectorImpl<SDValue> &InVals) const{

20385SelectionDAG &DAG = CLI.DAG;

20386SDLoc &DL = CLI.DL;

20387SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;

20388SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;

20389SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;

20390SDValue Chain = CLI.Chain;

20391SDValue Callee = CLI.Callee;

20392bool &IsTailCall = CLI.IsTailCall;

20393CallingConv::ID CallConv = CLI.CallConv;

20394bool IsVarArg = CLI.IsVarArg;

20395EVT PtrVT =getPointerTy(DAG.getDataLayout());

20396MVT XLenVT = Subtarget.getXLenVT();

20397

20398MachineFunction &MF = DAG.getMachineFunction();

20399

20400// Analyze the operands of the call, assigning locations to each operand.

20401SmallVector<CCValAssign, 16> ArgLocs;

20402CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());

20403

20404if (CallConv ==CallingConv::GHC) {

20405if (Subtarget.hasStdExtE())

20406report_fatal_error("GHC calling convention is not supported on RVE!");

20407 ArgCCInfo.AnalyzeCallOperands(Outs,CC_RISCV_GHC);

20408 }else

20409 analyzeOutputArgs(MF, ArgCCInfo, Outs,/*IsRet=*/false, &CLI,

20410 CallConv ==CallingConv::Fast ?CC_RISCV_FastCC

20411 :CC_RISCV);

20412

20413// Check if it's really possible to do a tail call.

20414if (IsTailCall)

20415 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);

20416

20417if (IsTailCall)

20418 ++NumTailCalls;

20419elseif (CLI.CB && CLI.CB->isMustTailCall())

20420report_fatal_error("failed to perform tail call elimination on a call "

20421"site marked musttail");

20422

20423// Get a count of how many bytes are to be pushed on the stack.

20424unsigned NumBytes = ArgCCInfo.getStackSize();

20425

20426// Create local copies for byval args

20427SmallVector<SDValue, 8> ByValArgs;

20428for (unsigned i = 0, e = Outs.size(); i != e; ++i) {

20429ISD::ArgFlagsTy Flags = Outs[i].Flags;

20430if (!Flags.isByVal())

20431continue;

20432

20433SDValue Arg = OutVals[i];

20434unsignedSize = Flags.getByValSize();

20435Align Alignment = Flags.getNonZeroByValAlign();

20436

20437int FI =

20438 MF.getFrameInfo().CreateStackObject(Size, Alignment,/*isSS=*/false);

20439SDValue FIPtr = DAG.getFrameIndex(FI,getPointerTy(DAG.getDataLayout()));

20440SDValue SizeNode = DAG.getConstant(Size,DL, XLenVT);

20441

20442 Chain = DAG.getMemcpy(Chain,DL, FIPtr, Arg, SizeNode, Alignment,

20443/*IsVolatile=*/false,

20444/*AlwaysInline=*/false,/*CI*/nullptr, IsTailCall,

20445MachinePointerInfo(),MachinePointerInfo());

20446 ByValArgs.push_back(FIPtr);

20447 }

20448

20449if (!IsTailCall)

20450 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);

20451

20452// Copy argument values to their designated locations.

20453SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;

20454SmallVector<SDValue, 8> MemOpChains;

20455SDValue StackPtr;

20456for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;

20457 ++i, ++OutIdx) {

20458CCValAssign &VA = ArgLocs[i];

20459SDValue ArgValue = OutVals[OutIdx];

20460ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;

20461

20462// Handle passing f64 on RV32D with a soft float ABI as a special case.

20463if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {

20464assert(VA.isRegLoc() &&"Expected register VA assignment");

20465assert(VA.needsCustom());

20466SDValue SplitF64 = DAG.getNode(

20467RISCVISD::SplitF64,DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);

20468SDValue Lo = SplitF64.getValue(0);

20469SDValue Hi = SplitF64.getValue(1);

20470

20471Register RegLo = VA.getLocReg();

20472 RegsToPass.push_back(std::make_pair(RegLo,Lo));

20473

20474// Get the CCValAssign for the Hi part.

20475CCValAssign &HiVA = ArgLocs[++i];

20476

20477if (HiVA.isMemLoc()) {

20478// Second half of f64 is passed on the stack.

20479if (!StackPtr.getNode())

20480 StackPtr = DAG.getCopyFromReg(Chain,DL, RISCV::X2, PtrVT);

20481SDValue Address =

20482 DAG.getNode(ISD::ADD,DL, PtrVT, StackPtr,

20483 DAG.getIntPtrConstant(HiVA.getLocMemOffset(),DL));

20484// Emit the store.

20485 MemOpChains.push_back(DAG.getStore(

20486 Chain,DL,Hi,Address,

20487MachinePointerInfo::getStack(MF, HiVA.getLocMemOffset())));

20488 }else {

20489// Second half of f64 is passed in another GPR.

20490Register RegHigh = HiVA.getLocReg();

20491 RegsToPass.push_back(std::make_pair(RegHigh,Hi));

20492 }

20493continue;

20494 }

20495

20496// Promote the value if needed.

20497// For now, only handle fully promoted and indirect arguments.

20498if (VA.getLocInfo() ==CCValAssign::Indirect) {

20499// Store the argument in a stack slot and pass its address.

20500Align StackAlign =

20501 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),

20502getPrefTypeAlign(ArgValue.getValueType(), DAG));

20503TypeSize StoredSize = ArgValue.getValueType().getStoreSize();

20504// If the original argument was split (e.g. i128), we need

20505// to store the required parts of it here (and pass just one address).

20506// Vectors may be partly split to registers and partly to the stack, in

20507// which case the base address is partly offset and subsequent stores are

20508// relative to that.

20509unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;

20510unsigned ArgPartOffset = Outs[OutIdx].PartOffset;

20511assert(VA.getValVT().isVector() || ArgPartOffset == 0);

20512// Calculate the total size to store. We don't have access to what we're

20513// actually storing other than performing the loop and collecting the

20514// info.

20515SmallVector<std::pair<SDValue, SDValue>> Parts;

20516while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {

20517SDValue PartValue = OutVals[OutIdx + 1];

20518unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;

20519SDValue Offset = DAG.getIntPtrConstant(PartOffset,DL);

20520EVT PartVT = PartValue.getValueType();

20521if (PartVT.isScalableVector())

20522Offset = DAG.getNode(ISD::VSCALE,DL, XLenVT,Offset);

20523 StoredSize += PartVT.getStoreSize();

20524 StackAlign = std::max(StackAlign,getPrefTypeAlign(PartVT, DAG));

20525 Parts.push_back(std::make_pair(PartValue,Offset));

20526 ++i;

20527 ++OutIdx;

20528 }

20529SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);

20530int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();

20531 MemOpChains.push_back(

20532 DAG.getStore(Chain,DL, ArgValue, SpillSlot,

20533MachinePointerInfo::getFixedStack(MF, FI)));

20534for (constauto &Part : Parts) {

20535SDValue PartValue = Part.first;

20536SDValue PartOffset = Part.second;

20537SDValue Address =

20538 DAG.getNode(ISD::ADD,DL, PtrVT, SpillSlot, PartOffset);

20539 MemOpChains.push_back(

20540 DAG.getStore(Chain,DL, PartValue,Address,

20541MachinePointerInfo::getFixedStack(MF, FI)));

20542 }

20543 ArgValue = SpillSlot;

20544 }else {

20545 ArgValue =convertValVTToLocVT(DAG, ArgValue, VA,DL, Subtarget);

20546 }

20547

20548// Use local copy if it is a byval arg.

20549if (Flags.isByVal())

20550 ArgValue = ByValArgs[j++];

20551

20552if (VA.isRegLoc()) {

20553// Queue up the argument copies and emit them at the end.

20554 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));

20555 }else {

20556assert(VA.isMemLoc() &&"Argument not register or memory");

20557assert(!IsTailCall &&"Tail call not allowed if stack is used "

20558"for passing parameters");

20559

20560// Work out the address of the stack slot.

20561if (!StackPtr.getNode())

20562 StackPtr = DAG.getCopyFromReg(Chain,DL, RISCV::X2, PtrVT);

20563SDValue Address =

20564 DAG.getNode(ISD::ADD,DL, PtrVT, StackPtr,

20565 DAG.getIntPtrConstant(VA.getLocMemOffset(),DL));

20566

20567// Emit the store.

20568 MemOpChains.push_back(

20569 DAG.getStore(Chain,DL, ArgValue,Address,

20570MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));

20571 }

20572 }

20573

20574// Join the stores, which are independent of one another.

20575if (!MemOpChains.empty())

20576 Chain = DAG.getNode(ISD::TokenFactor,DL, MVT::Other, MemOpChains);

20577

20578SDValue Glue;

20579

20580// Build a sequence of copy-to-reg nodes, chained and glued together.

20581for (auto &Reg : RegsToPass) {

20582 Chain = DAG.getCopyToReg(Chain,DL, Reg.first, Reg.second, Glue);

20583 Glue = Chain.getValue(1);

20584 }

20585

20586// Validate that none of the argument registers have been marked as

20587// reserved, if so report an error. Do the same for the return address if this

20588// is not a tailcall.

20589 validateCCReservedRegs(RegsToPass, MF);

20590if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))

20591 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{

20592 MF.getFunction(),

20593"Return address register required, but has been reserved."});

20594

20595// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a

20596// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't

20597// split it and then direct call can be matched by PseudoCALL.

20598bool CalleeIsLargeExternalSymbol =false;

20599if (getTargetMachine().getCodeModel() ==CodeModel::Large) {

20600if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))

20601 Callee =getLargeGlobalAddress(S,DL, PtrVT, DAG);

20602elseif (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

20603 Callee =getLargeExternalSymbol(S,DL, PtrVT, DAG);

20604 CalleeIsLargeExternalSymbol =true;

20605 }

20606 }elseif (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {

20607constGlobalValue *GV = S->getGlobal();

20608 Callee = DAG.getTargetGlobalAddress(GV,DL, PtrVT, 0,RISCVII::MO_CALL);

20609 }elseif (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

20610 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT,RISCVII::MO_CALL);

20611 }

20612

20613// The first call operand is the chain and the second is the target address.

20614SmallVector<SDValue, 8> Ops;

20615 Ops.push_back(Chain);

20616 Ops.push_back(Callee);

20617

20618// Add argument registers to the end of the list so that they are

20619// known live into the call.

20620for (auto &Reg : RegsToPass)

20621 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));

20622

20623// Add a register mask operand representing the call-preserved registers.

20624constTargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

20625constuint32_t *Mask =TRI->getCallPreservedMask(MF, CallConv);

20626assert(Mask &&"Missing call preserved mask for calling convention");

20627 Ops.push_back(DAG.getRegisterMask(Mask));

20628

20629// Glue the call to the argument copies, if any.

20630if (Glue.getNode())

20631 Ops.push_back(Glue);

20632

20633assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&

20634"Unexpected CFI type for a direct call");

20635

20636// Emit the call.

20637SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

20638

20639// Use software guarded branch for large code model non-indirect calls

20640// Tail call to external symbol will have a null CLI.CB and we need another

20641// way to determine the callsite type

20642bool NeedSWGuarded =false;

20643if (getTargetMachine().getCodeModel() ==CodeModel::Large &&

20644 Subtarget.hasStdExtZicfilp() &&

20645 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))

20646 NeedSWGuarded =true;

20647

20648if (IsTailCall) {

20649 MF.getFrameInfo().setHasTailCall();

20650unsigned CallOpc =

20651 NeedSWGuarded ?RISCVISD::SW_GUARDED_TAIL :RISCVISD::TAIL;

20652SDValue Ret = DAG.getNode(CallOpc,DL, NodeTys, Ops);

20653if (CLI.CFIType)

20654 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());

20655 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);

20656return Ret;

20657 }

20658

20659unsigned CallOpc = NeedSWGuarded ?RISCVISD::SW_GUARDED_CALL :RISCVISD::CALL;

20660 Chain = DAG.getNode(CallOpc,DL, NodeTys, Ops);

20661if (CLI.CFIType)

20662 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());

20663 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);

20664 Glue = Chain.getValue(1);

20665

20666// Mark the end of the call, which is glued to the call itself.

20667 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue,DL);

20668 Glue = Chain.getValue(1);

20669

20670// Assign locations to each value returned by this call.

20671SmallVector<CCValAssign, 16> RVLocs;

20672CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());

20673 analyzeInputArgs(MF, RetCCInfo, Ins,/*IsRet=*/true,CC_RISCV);

20674

20675// Copy all of the result registers out of their specified physreg.

20676for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {

20677auto &VA = RVLocs[i];

20678// Copy the value out

20679SDValue RetValue =

20680 DAG.getCopyFromReg(Chain,DL, VA.getLocReg(), VA.getLocVT(), Glue);

20681// Glue the RetValue to the end of the call sequence

20682 Chain = RetValue.getValue(1);

20683 Glue = RetValue.getValue(2);

20684

20685if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {

20686assert(VA.needsCustom());

20687SDValue RetValue2 = DAG.getCopyFromReg(Chain,DL, RVLocs[++i].getLocReg(),

20688 MVT::i32, Glue);

20689 Chain = RetValue2.getValue(1);

20690 Glue = RetValue2.getValue(2);

20691 RetValue = DAG.getNode(RISCVISD::BuildPairF64,DL, MVT::f64, RetValue,

20692 RetValue2);

20693 }else

20694 RetValue =convertLocVTToValVT(DAG, RetValue, VA,DL, Subtarget);

20695

20696 InVals.push_back(RetValue);

20697 }

20698

20699return Chain;

20700}

20701

20702boolRISCVTargetLowering::CanLowerReturn(

20703CallingConv::ID CallConv,MachineFunction &MF,bool IsVarArg,

20704constSmallVectorImpl<ISD::OutputArg> &Outs,LLVMContext &Context,

20705constType *RetTy) const{

20706SmallVector<CCValAssign, 16> RVLocs;

20707CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);

20708

20709for (unsigned i = 0, e = Outs.size(); i != e; ++i) {

20710MVT VT = Outs[i].VT;

20711ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;

20712if (CC_RISCV(i, VT, VT,CCValAssign::Full, ArgFlags, CCInfo,

20713/*IsFixed=*/true,/*IsRet=*/true,nullptr))

20714returnfalse;

20715 }

20716returntrue;

20717}

20718

20719SDValue

20720RISCVTargetLowering::LowerReturn(SDValue Chain,CallingConv::ID CallConv,

20721bool IsVarArg,

20722constSmallVectorImpl<ISD::OutputArg> &Outs,

20723constSmallVectorImpl<SDValue> &OutVals,

20724constSDLoc &DL,SelectionDAG &DAG) const{

20725MachineFunction &MF = DAG.getMachineFunction();

20726constRISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();

20727

20728// Stores the assignment of the return value to a location.

20729SmallVector<CCValAssign, 16> RVLocs;

20730

20731// Info about the registers and stack slot.

20732CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,

20733 *DAG.getContext());

20734

20735 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs,/*IsRet=*/true,

20736nullptr,CC_RISCV);

20737

20738if (CallConv ==CallingConv::GHC && !RVLocs.empty())

20739report_fatal_error("GHC functions return void only");

20740

20741SDValue Glue;

20742SmallVector<SDValue, 4> RetOps(1, Chain);

20743

20744// Copy the result values into the output registers.

20745for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {

20746SDValue Val = OutVals[OutIdx];

20747CCValAssign &VA = RVLocs[i];

20748assert(VA.isRegLoc() &&"Can only return in registers!");

20749

20750if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {

20751// Handle returning f64 on RV32D with a soft float ABI.

20752assert(VA.isRegLoc() &&"Expected return via registers");

20753assert(VA.needsCustom());

20754SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64,DL,

20755 DAG.getVTList(MVT::i32, MVT::i32), Val);

20756SDValue Lo = SplitF64.getValue(0);

20757SDValue Hi = SplitF64.getValue(1);

20758Register RegLo = VA.getLocReg();

20759Register RegHi = RVLocs[++i].getLocReg();

20760

20761if (STI.isRegisterReservedByUser(RegLo) ||

20762 STI.isRegisterReservedByUser(RegHi))

20763 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{

20764 MF.getFunction(),

20765"Return value register required, but has been reserved."});

20766

20767 Chain = DAG.getCopyToReg(Chain,DL, RegLo,Lo, Glue);

20768 Glue = Chain.getValue(1);

20769 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));

20770 Chain = DAG.getCopyToReg(Chain,DL, RegHi,Hi, Glue);

20771 Glue = Chain.getValue(1);

20772 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));

20773 }else {

20774// Handle a 'normal' return.

20775 Val =convertValVTToLocVT(DAG, Val, VA,DL, Subtarget);

20776 Chain = DAG.getCopyToReg(Chain,DL, VA.getLocReg(), Val, Glue);

20777

20778if (STI.isRegisterReservedByUser(VA.getLocReg()))

20779 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{

20780 MF.getFunction(),

20781"Return value register required, but has been reserved."});

20782

20783// Guarantee that all emitted copies are stuck together.

20784 Glue = Chain.getValue(1);

20785 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

20786 }

20787 }

20788

20789 RetOps[0] = Chain;// Update chain.

20790

20791// Add the glue node if we have it.

20792if (Glue.getNode()) {

20793 RetOps.push_back(Glue);

20794 }

20795

20796if (any_of(RVLocs,

20797 [](CCValAssign &VA) {return VA.getLocVT().isScalableVector(); }))

20798 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();

20799

20800unsigned RetOpc =RISCVISD::RET_GLUE;

20801// Interrupt service routines use different return instructions.

20802constFunction &Func = DAG.getMachineFunction().getFunction();

20803if (Func.hasFnAttribute("interrupt")) {

20804if (!Func.getReturnType()->isVoidTy())

20805report_fatal_error(

20806"Functions with the interrupt attribute must have void return type!");

20807

20808MachineFunction &MF = DAG.getMachineFunction();

20809StringRef Kind =

20810 MF.getFunction().getFnAttribute("interrupt").getValueAsString();

20811

20812if (Kind =="supervisor")

20813 RetOpc =RISCVISD::SRET_GLUE;

20814else

20815 RetOpc =RISCVISD::MRET_GLUE;

20816 }

20817

20818return DAG.getNode(RetOpc,DL, MVT::Other, RetOps);

20819}

20820

20821void RISCVTargetLowering::validateCCReservedRegs(

20822constSmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,

20823MachineFunction &MF) const{

20824constFunction &F = MF.getFunction();

20825constRISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();

20826

20827if (llvm::any_of(Regs, [&STI](auto Reg) {

20828return STI.isRegisterReservedByUser(Reg.first);

20829 }))

20830F.getContext().diagnose(DiagnosticInfoUnsupported{

20831F,"Argument register required, but has been reserved."});

20832}

20833

20834// Check if the result of the node is only used as a return value, as

20835// otherwise we can't perform a tail-call.

20836boolRISCVTargetLowering::isUsedByReturnOnly(SDNode *N,SDValue &Chain) const{

20837if (N->getNumValues() != 1)

20838returnfalse;

20839if (!N->hasNUsesOfValue(1, 0))

20840returnfalse;

20841

20842SDNode *Copy = *N->user_begin();

20843

20844if (Copy->getOpcode() ==ISD::BITCAST) {

20845returnisUsedByReturnOnly(Copy, Chain);

20846 }

20847

20848// TODO: Handle additional opcodes in order to support tail-calling libcalls

20849// with soft float ABIs.

20850if (Copy->getOpcode() !=ISD::CopyToReg) {

20851returnfalse;

20852 }

20853

20854// If the ISD::CopyToReg has a glue operand, we conservatively assume it

20855// isn't safe to perform a tail call.

20856if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)

20857returnfalse;

20858

20859// The copy must be used by a RISCVISD::RET_GLUE, and nothing else.

20860bool HasRet =false;

20861for (SDNode *Node : Copy->users()) {

20862if (Node->getOpcode() !=RISCVISD::RET_GLUE)

20863returnfalse;

20864 HasRet =true;

20865 }

20866if (!HasRet)

20867returnfalse;

20868

20869 Chain = Copy->getOperand(0);

20870returntrue;

20871}

20872

20873boolRISCVTargetLowering::mayBeEmittedAsTailCall(constCallInst *CI) const{

20874return CI->isTailCall();

20875}

20876

20877constchar *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const{

20878#define NODE_NAME_CASE(NODE) \

20879 case RISCVISD::NODE: \

20880 return "RISCVISD::" #NODE;

20881// clang-format off

20882switch ((RISCVISD::NodeType)Opcode) {

20883caseRISCVISD::FIRST_NUMBER:

20884break;

20885NODE_NAME_CASE(RET_GLUE)

20886NODE_NAME_CASE(SRET_GLUE)

20887NODE_NAME_CASE(MRET_GLUE)

20888NODE_NAME_CASE(CALL)

20889NODE_NAME_CASE(TAIL)

20890NODE_NAME_CASE(SELECT_CC)

20891NODE_NAME_CASE(BR_CC)

20892NODE_NAME_CASE(BuildGPRPair)

20893NODE_NAME_CASE(SplitGPRPair)

20894NODE_NAME_CASE(BuildPairF64)

20895NODE_NAME_CASE(SplitF64)

20896NODE_NAME_CASE(ADD_LO)

20897NODE_NAME_CASE(HI)

20898NODE_NAME_CASE(LLA)

20899NODE_NAME_CASE(ADD_TPREL)

20900NODE_NAME_CASE(MULHSU)

20901NODE_NAME_CASE(SHL_ADD)

20902NODE_NAME_CASE(SLLW)

20903NODE_NAME_CASE(SRAW)

20904NODE_NAME_CASE(SRLW)

20905NODE_NAME_CASE(DIVW)

20906NODE_NAME_CASE(DIVUW)

20907NODE_NAME_CASE(REMUW)

20908NODE_NAME_CASE(ROLW)

20909NODE_NAME_CASE(RORW)

20910NODE_NAME_CASE(CLZW)

20911NODE_NAME_CASE(CTZW)

20912NODE_NAME_CASE(ABSW)

20913NODE_NAME_CASE(FMV_H_X)

20914NODE_NAME_CASE(FMV_X_ANYEXTH)

20915NODE_NAME_CASE(FMV_X_SIGNEXTH)

20916NODE_NAME_CASE(FMV_W_X_RV64)

20917NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)

20918NODE_NAME_CASE(FCVT_X)

20919NODE_NAME_CASE(FCVT_XU)

20920NODE_NAME_CASE(FCVT_W_RV64)

20921NODE_NAME_CASE(FCVT_WU_RV64)

20922NODE_NAME_CASE(STRICT_FCVT_W_RV64)

20923NODE_NAME_CASE(STRICT_FCVT_WU_RV64)

20924NODE_NAME_CASE(FROUND)

20925NODE_NAME_CASE(FCLASS)

20926NODE_NAME_CASE(FSGNJX)

20927NODE_NAME_CASE(FMAX)

20928NODE_NAME_CASE(FMIN)

20929NODE_NAME_CASE(FLI)

20930NODE_NAME_CASE(READ_COUNTER_WIDE)

20931NODE_NAME_CASE(BREV8)

20932NODE_NAME_CASE(ORC_B)

20933NODE_NAME_CASE(ZIP)

20934NODE_NAME_CASE(UNZIP)

20935NODE_NAME_CASE(CLMUL)

20936NODE_NAME_CASE(CLMULH)

20937NODE_NAME_CASE(CLMULR)

20938NODE_NAME_CASE(MOPR)

20939NODE_NAME_CASE(MOPRR)

20940NODE_NAME_CASE(SHA256SIG0)

20941NODE_NAME_CASE(SHA256SIG1)

20942NODE_NAME_CASE(SHA256SUM0)

20943NODE_NAME_CASE(SHA256SUM1)

20944NODE_NAME_CASE(SM4KS)

20945NODE_NAME_CASE(SM4ED)

20946NODE_NAME_CASE(SM3P0)

20947NODE_NAME_CASE(SM3P1)

20948NODE_NAME_CASE(TH_LWD)

20949NODE_NAME_CASE(TH_LWUD)

20950NODE_NAME_CASE(TH_LDD)

20951NODE_NAME_CASE(TH_SWD)

20952NODE_NAME_CASE(TH_SDD)

20953NODE_NAME_CASE(VMV_V_V_VL)

20954NODE_NAME_CASE(VMV_V_X_VL)

20955NODE_NAME_CASE(VFMV_V_F_VL)

20956NODE_NAME_CASE(VMV_X_S)

20957NODE_NAME_CASE(VMV_S_X_VL)

20958NODE_NAME_CASE(VFMV_S_F_VL)

20959NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)

20960NODE_NAME_CASE(READ_VLENB)

20961NODE_NAME_CASE(TRUNCATE_VECTOR_VL)

20962NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)

20963NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)

20964NODE_NAME_CASE(VSLIDEUP_VL)

20965NODE_NAME_CASE(VSLIDE1UP_VL)

20966NODE_NAME_CASE(VSLIDEDOWN_VL)

20967NODE_NAME_CASE(VSLIDE1DOWN_VL)

20968NODE_NAME_CASE(VFSLIDE1UP_VL)

20969NODE_NAME_CASE(VFSLIDE1DOWN_VL)

20970NODE_NAME_CASE(VID_VL)

20971NODE_NAME_CASE(VFNCVT_ROD_VL)

20972NODE_NAME_CASE(VECREDUCE_ADD_VL)

20973NODE_NAME_CASE(VECREDUCE_UMAX_VL)

20974NODE_NAME_CASE(VECREDUCE_SMAX_VL)

20975NODE_NAME_CASE(VECREDUCE_UMIN_VL)

20976NODE_NAME_CASE(VECREDUCE_SMIN_VL)

20977NODE_NAME_CASE(VECREDUCE_AND_VL)

20978NODE_NAME_CASE(VECREDUCE_OR_VL)

20979NODE_NAME_CASE(VECREDUCE_XOR_VL)

20980NODE_NAME_CASE(VECREDUCE_FADD_VL)

20981NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)

20982NODE_NAME_CASE(VECREDUCE_FMIN_VL)

20983NODE_NAME_CASE(VECREDUCE_FMAX_VL)

20984NODE_NAME_CASE(ADD_VL)

20985NODE_NAME_CASE(AND_VL)

20986NODE_NAME_CASE(MUL_VL)

20987NODE_NAME_CASE(OR_VL)

20988NODE_NAME_CASE(SDIV_VL)

20989NODE_NAME_CASE(SHL_VL)

20990NODE_NAME_CASE(SREM_VL)

20991NODE_NAME_CASE(SRA_VL)

20992NODE_NAME_CASE(SRL_VL)

20993NODE_NAME_CASE(ROTL_VL)

20994NODE_NAME_CASE(ROTR_VL)

20995NODE_NAME_CASE(SUB_VL)

20996NODE_NAME_CASE(UDIV_VL)

20997NODE_NAME_CASE(UREM_VL)

20998NODE_NAME_CASE(XOR_VL)

20999NODE_NAME_CASE(AVGFLOORS_VL)

21000NODE_NAME_CASE(AVGFLOORU_VL)

21001NODE_NAME_CASE(AVGCEILS_VL)

21002NODE_NAME_CASE(AVGCEILU_VL)

21003NODE_NAME_CASE(SADDSAT_VL)

21004NODE_NAME_CASE(UADDSAT_VL)

21005NODE_NAME_CASE(SSUBSAT_VL)

21006NODE_NAME_CASE(USUBSAT_VL)

21007NODE_NAME_CASE(FADD_VL)

21008NODE_NAME_CASE(FSUB_VL)

21009NODE_NAME_CASE(FMUL_VL)

21010NODE_NAME_CASE(FDIV_VL)

21011NODE_NAME_CASE(FNEG_VL)

21012NODE_NAME_CASE(FABS_VL)

21013NODE_NAME_CASE(FSQRT_VL)

21014NODE_NAME_CASE(FCLASS_VL)

21015NODE_NAME_CASE(VFMADD_VL)

21016NODE_NAME_CASE(VFNMADD_VL)

21017NODE_NAME_CASE(VFMSUB_VL)

21018NODE_NAME_CASE(VFNMSUB_VL)

21019NODE_NAME_CASE(VFWMADD_VL)

21020NODE_NAME_CASE(VFWNMADD_VL)

21021NODE_NAME_CASE(VFWMSUB_VL)

21022NODE_NAME_CASE(VFWNMSUB_VL)

21023NODE_NAME_CASE(FCOPYSIGN_VL)

21024NODE_NAME_CASE(SMIN_VL)

21025NODE_NAME_CASE(SMAX_VL)

21026NODE_NAME_CASE(UMIN_VL)

21027NODE_NAME_CASE(UMAX_VL)

21028NODE_NAME_CASE(BITREVERSE_VL)

21029NODE_NAME_CASE(BSWAP_VL)

21030NODE_NAME_CASE(CTLZ_VL)

21031NODE_NAME_CASE(CTTZ_VL)

21032NODE_NAME_CASE(CTPOP_VL)

21033NODE_NAME_CASE(VFMIN_VL)

21034NODE_NAME_CASE(VFMAX_VL)

21035NODE_NAME_CASE(MULHS_VL)

21036NODE_NAME_CASE(MULHU_VL)

21037NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)

21038NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)

21039NODE_NAME_CASE(VFCVT_RM_X_F_VL)

21040NODE_NAME_CASE(VFCVT_RM_XU_F_VL)

21041NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)

21042NODE_NAME_CASE(SINT_TO_FP_VL)

21043NODE_NAME_CASE(UINT_TO_FP_VL)

21044NODE_NAME_CASE(VFCVT_RM_F_XU_VL)

21045NODE_NAME_CASE(VFCVT_RM_F_X_VL)

21046NODE_NAME_CASE(FP_EXTEND_VL)

21047NODE_NAME_CASE(FP_ROUND_VL)

21048NODE_NAME_CASE(STRICT_FADD_VL)

21049NODE_NAME_CASE(STRICT_FSUB_VL)

21050NODE_NAME_CASE(STRICT_FMUL_VL)

21051NODE_NAME_CASE(STRICT_FDIV_VL)

21052NODE_NAME_CASE(STRICT_FSQRT_VL)

21053NODE_NAME_CASE(STRICT_VFMADD_VL)

21054NODE_NAME_CASE(STRICT_VFNMADD_VL)

21055NODE_NAME_CASE(STRICT_VFMSUB_VL)

21056NODE_NAME_CASE(STRICT_VFNMSUB_VL)

21057NODE_NAME_CASE(STRICT_FP_ROUND_VL)

21058NODE_NAME_CASE(STRICT_FP_EXTEND_VL)

21059NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)

21060NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)

21061NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)

21062NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)

21063NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)

21064NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)

21065NODE_NAME_CASE(STRICT_FSETCC_VL)

21066NODE_NAME_CASE(STRICT_FSETCCS_VL)

21067NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)

21068NODE_NAME_CASE(VWMUL_VL)

21069NODE_NAME_CASE(VWMULU_VL)

21070NODE_NAME_CASE(VWMULSU_VL)

21071NODE_NAME_CASE(VWADD_VL)

21072NODE_NAME_CASE(VWADDU_VL)

21073NODE_NAME_CASE(VWSUB_VL)

21074NODE_NAME_CASE(VWSUBU_VL)

21075NODE_NAME_CASE(VWADD_W_VL)

21076NODE_NAME_CASE(VWADDU_W_VL)

21077NODE_NAME_CASE(VWSUB_W_VL)

21078NODE_NAME_CASE(VWSUBU_W_VL)

21079NODE_NAME_CASE(VWSLL_VL)

21080NODE_NAME_CASE(VFWMUL_VL)

21081NODE_NAME_CASE(VFWADD_VL)

21082NODE_NAME_CASE(VFWSUB_VL)

21083NODE_NAME_CASE(VFWADD_W_VL)

21084NODE_NAME_CASE(VFWSUB_W_VL)

21085NODE_NAME_CASE(VWMACC_VL)

21086NODE_NAME_CASE(VWMACCU_VL)

21087NODE_NAME_CASE(VWMACCSU_VL)

21088NODE_NAME_CASE(SETCC_VL)

21089NODE_NAME_CASE(VMERGE_VL)

21090NODE_NAME_CASE(VMAND_VL)

21091NODE_NAME_CASE(VMOR_VL)

21092NODE_NAME_CASE(VMXOR_VL)

21093NODE_NAME_CASE(VMCLR_VL)

21094NODE_NAME_CASE(VMSET_VL)

21095NODE_NAME_CASE(VRGATHER_VX_VL)

21096NODE_NAME_CASE(VRGATHER_VV_VL)

21097NODE_NAME_CASE(VRGATHEREI16_VV_VL)

21098NODE_NAME_CASE(VSEXT_VL)

21099NODE_NAME_CASE(VZEXT_VL)

21100NODE_NAME_CASE(VCPOP_VL)

21101NODE_NAME_CASE(VFIRST_VL)

21102NODE_NAME_CASE(READ_CSR)

21103NODE_NAME_CASE(WRITE_CSR)

21104NODE_NAME_CASE(SWAP_CSR)

21105NODE_NAME_CASE(CZERO_EQZ)

21106NODE_NAME_CASE(CZERO_NEZ)

21107NODE_NAME_CASE(SW_GUARDED_BRIND)

21108NODE_NAME_CASE(SW_GUARDED_CALL)

21109NODE_NAME_CASE(SW_GUARDED_TAIL)

21110NODE_NAME_CASE(TUPLE_INSERT)

21111NODE_NAME_CASE(TUPLE_EXTRACT)

21112NODE_NAME_CASE(SF_VC_XV_SE)

21113NODE_NAME_CASE(SF_VC_IV_SE)

21114NODE_NAME_CASE(SF_VC_VV_SE)

21115NODE_NAME_CASE(SF_VC_FV_SE)

21116NODE_NAME_CASE(SF_VC_XVV_SE)

21117NODE_NAME_CASE(SF_VC_IVV_SE)

21118NODE_NAME_CASE(SF_VC_VVV_SE)

21119NODE_NAME_CASE(SF_VC_FVV_SE)

21120NODE_NAME_CASE(SF_VC_XVW_SE)

21121NODE_NAME_CASE(SF_VC_IVW_SE)

21122NODE_NAME_CASE(SF_VC_VVW_SE)

21123NODE_NAME_CASE(SF_VC_FVW_SE)

21124NODE_NAME_CASE(SF_VC_V_X_SE)

21125NODE_NAME_CASE(SF_VC_V_I_SE)

21126NODE_NAME_CASE(SF_VC_V_XV_SE)

21127NODE_NAME_CASE(SF_VC_V_IV_SE)

21128NODE_NAME_CASE(SF_VC_V_VV_SE)

21129NODE_NAME_CASE(SF_VC_V_FV_SE)

21130NODE_NAME_CASE(SF_VC_V_XVV_SE)

21131NODE_NAME_CASE(SF_VC_V_IVV_SE)

21132NODE_NAME_CASE(SF_VC_V_VVV_SE)

21133NODE_NAME_CASE(SF_VC_V_FVV_SE)

21134NODE_NAME_CASE(SF_VC_V_XVW_SE)

21135NODE_NAME_CASE(SF_VC_V_IVW_SE)

21136NODE_NAME_CASE(SF_VC_V_VVW_SE)

21137NODE_NAME_CASE(SF_VC_V_FVW_SE)

21138NODE_NAME_CASE(PROBED_ALLOCA)

21139 }

21140// clang-format on

21141returnnullptr;

21142#undef NODE_NAME_CASE

21143}

21144

21145/// getConstraintType - Given a constraint letter, return the type of

21146/// constraint it is for this target.

21147RISCVTargetLowering::ConstraintType

21148RISCVTargetLowering::getConstraintType(StringRef Constraint) const{

21149if (Constraint.size() == 1) {

21150switch (Constraint[0]) {

21151default:

21152break;

21153case'f':

21154case'R':

21155returnC_RegisterClass;

21156case'I':

21157case'J':

21158case'K':

21159returnC_Immediate;

21160case'A':

21161returnC_Memory;

21162case's':

21163case'S':// A symbolic address

21164returnC_Other;

21165 }

21166 }else {

21167if (Constraint =="vr" || Constraint =="vd" || Constraint =="vm")

21168returnC_RegisterClass;

21169if (Constraint =="cr" || Constraint =="cR" || Constraint =="cf")

21170returnC_RegisterClass;

21171 }

21172returnTargetLowering::getConstraintType(Constraint);

21173}

21174

21175std::pair<unsigned, const TargetRegisterClass *>

21176RISCVTargetLowering::getRegForInlineAsmConstraint(constTargetRegisterInfo *TRI,

21177StringRef Constraint,

21178MVT VT) const{

21179// First, see if this is a constraint that directly corresponds to a RISC-V

21180// register class.

21181if (Constraint.size() == 1) {

21182switch (Constraint[0]) {

21183case'r':

21184// TODO: Support fixed vectors up to XLen for P extension?

21185if (VT.isVector())

21186break;

21187if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())

21188return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);

21189if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())

21190return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);

21191if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())

21192return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);

21193return std::make_pair(0U, &RISCV::GPRNoX0RegClass);

21194case'f':

21195if (VT == MVT::f16) {

21196if (Subtarget.hasStdExtZfhmin())

21197return std::make_pair(0U, &RISCV::FPR16RegClass);

21198if (Subtarget.hasStdExtZhinxmin())

21199return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);

21200 }elseif (VT == MVT::f32) {

21201if (Subtarget.hasStdExtF())

21202return std::make_pair(0U, &RISCV::FPR32RegClass);

21203if (Subtarget.hasStdExtZfinx())

21204return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);

21205 }elseif (VT == MVT::f64) {

21206if (Subtarget.hasStdExtD())

21207return std::make_pair(0U, &RISCV::FPR64RegClass);

21208if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())

21209return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);

21210if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())

21211return std::make_pair(0U, &RISCV::GPRNoX0RegClass);

21212 }

21213break;

21214case'R':

21215return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);

21216default:

21217break;

21218 }

21219 }elseif (Constraint =="vr") {

21220for (constauto *RC :

21221 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,

21222 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,

21223 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,

21224 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,

21225 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,

21226 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,

21227 &RISCV::VRN2M4RegClass}) {

21228if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))

21229return std::make_pair(0U, RC);

21230 }

21231 }elseif (Constraint =="vd") {

21232for (constauto *RC :

21233 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,

21234 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,

21235 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,

21236 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,

21237 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,

21238 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,

21239 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,

21240 &RISCV::VRN2M4NoV0RegClass}) {

21241if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))

21242return std::make_pair(0U, RC);

21243 }

21244 }elseif (Constraint =="vm") {

21245if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))

21246return std::make_pair(0U, &RISCV::VMV0RegClass);

21247 }elseif (Constraint =="cr") {

21248if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())

21249return std::make_pair(0U, &RISCV::GPRF16CRegClass);

21250if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())

21251return std::make_pair(0U, &RISCV::GPRF32CRegClass);

21252if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())

21253return std::make_pair(0U, &RISCV::GPRPairCRegClass);

21254if (!VT.isVector())

21255return std::make_pair(0U, &RISCV::GPRCRegClass);

21256 }elseif (Constraint =="cR") {

21257return std::make_pair(0U, &RISCV::GPRPairCRegClass);

21258 }elseif (Constraint =="cf") {

21259if (VT == MVT::f16) {

21260if (Subtarget.hasStdExtZfhmin())

21261return std::make_pair(0U, &RISCV::FPR16CRegClass);

21262if (Subtarget.hasStdExtZhinxmin())

21263return std::make_pair(0U, &RISCV::GPRF16CRegClass);

21264 }elseif (VT == MVT::f32) {

21265if (Subtarget.hasStdExtF())

21266return std::make_pair(0U, &RISCV::FPR32CRegClass);

21267if (Subtarget.hasStdExtZfinx())

21268return std::make_pair(0U, &RISCV::GPRF32CRegClass);

21269 }elseif (VT == MVT::f64) {

21270if (Subtarget.hasStdExtD())

21271return std::make_pair(0U, &RISCV::FPR64CRegClass);

21272if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())

21273return std::make_pair(0U, &RISCV::GPRPairCRegClass);

21274if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())

21275return std::make_pair(0U, &RISCV::GPRCRegClass);

21276 }

21277 }

21278

21279// Clang will correctly decode the usage of register name aliases into their

21280// official names. However, other frontends like `rustc` do not. This allows

21281// users of these frontends to use the ABI names for registers in LLVM-style

21282// register constraints.

21283unsigned XRegFromAlias =StringSwitch<unsigned>(Constraint.lower())

21284 .Case("{zero}", RISCV::X0)

21285 .Case("{ra}", RISCV::X1)

21286 .Case("{sp}", RISCV::X2)

21287 .Case("{gp}", RISCV::X3)

21288 .Case("{tp}", RISCV::X4)

21289 .Case("{t0}", RISCV::X5)

21290 .Case("{t1}", RISCV::X6)

21291 .Case("{t2}", RISCV::X7)

21292 .Cases("{s0}","{fp}", RISCV::X8)

21293 .Case("{s1}", RISCV::X9)

21294 .Case("{a0}", RISCV::X10)

21295 .Case("{a1}", RISCV::X11)

21296 .Case("{a2}", RISCV::X12)

21297 .Case("{a3}", RISCV::X13)

21298 .Case("{a4}", RISCV::X14)

21299 .Case("{a5}", RISCV::X15)

21300 .Case("{a6}", RISCV::X16)

21301 .Case("{a7}", RISCV::X17)

21302 .Case("{s2}", RISCV::X18)

21303 .Case("{s3}", RISCV::X19)

21304 .Case("{s4}", RISCV::X20)

21305 .Case("{s5}", RISCV::X21)

21306 .Case("{s6}", RISCV::X22)

21307 .Case("{s7}", RISCV::X23)

21308 .Case("{s8}", RISCV::X24)

21309 .Case("{s9}", RISCV::X25)

21310 .Case("{s10}", RISCV::X26)

21311 .Case("{s11}", RISCV::X27)

21312 .Case("{t3}", RISCV::X28)

21313 .Case("{t4}", RISCV::X29)

21314 .Case("{t5}", RISCV::X30)

21315 .Case("{t6}", RISCV::X31)

21316 .Default(RISCV::NoRegister);

21317if (XRegFromAlias != RISCV::NoRegister)

21318return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);

21319

21320// Since TargetLowering::getRegForInlineAsmConstraint uses the name of the

21321// TableGen record rather than the AsmName to choose registers for InlineAsm

21322// constraints, plus we want to match those names to the widest floating point

21323// register type available, manually select floating point registers here.

21324//

21325// The second case is the ABI name of the register, so that frontends can also

21326// use the ABI names in register constraint lists.

21327if (Subtarget.hasStdExtF()) {

21328unsigned FReg =StringSwitch<unsigned>(Constraint.lower())

21329 .Cases("{f0}","{ft0}", RISCV::F0_F)

21330 .Cases("{f1}","{ft1}", RISCV::F1_F)

21331 .Cases("{f2}","{ft2}", RISCV::F2_F)

21332 .Cases("{f3}","{ft3}", RISCV::F3_F)

21333 .Cases("{f4}","{ft4}", RISCV::F4_F)

21334 .Cases("{f5}","{ft5}", RISCV::F5_F)

21335 .Cases("{f6}","{ft6}", RISCV::F6_F)

21336 .Cases("{f7}","{ft7}", RISCV::F7_F)

21337 .Cases("{f8}","{fs0}", RISCV::F8_F)

21338 .Cases("{f9}","{fs1}", RISCV::F9_F)

21339 .Cases("{f10}","{fa0}", RISCV::F10_F)

21340 .Cases("{f11}","{fa1}", RISCV::F11_F)

21341 .Cases("{f12}","{fa2}", RISCV::F12_F)

21342 .Cases("{f13}","{fa3}", RISCV::F13_F)

21343 .Cases("{f14}","{fa4}", RISCV::F14_F)

21344 .Cases("{f15}","{fa5}", RISCV::F15_F)

21345 .Cases("{f16}","{fa6}", RISCV::F16_F)

21346 .Cases("{f17}","{fa7}", RISCV::F17_F)

21347 .Cases("{f18}","{fs2}", RISCV::F18_F)

21348 .Cases("{f19}","{fs3}", RISCV::F19_F)

21349 .Cases("{f20}","{fs4}", RISCV::F20_F)

21350 .Cases("{f21}","{fs5}", RISCV::F21_F)

21351 .Cases("{f22}","{fs6}", RISCV::F22_F)

21352 .Cases("{f23}","{fs7}", RISCV::F23_F)

21353 .Cases("{f24}","{fs8}", RISCV::F24_F)

21354 .Cases("{f25}","{fs9}", RISCV::F25_F)

21355 .Cases("{f26}","{fs10}", RISCV::F26_F)

21356 .Cases("{f27}","{fs11}", RISCV::F27_F)

21357 .Cases("{f28}","{ft8}", RISCV::F28_F)

21358 .Cases("{f29}","{ft9}", RISCV::F29_F)

21359 .Cases("{f30}","{ft10}", RISCV::F30_F)

21360 .Cases("{f31}","{ft11}", RISCV::F31_F)

21361 .Default(RISCV::NoRegister);

21362if (FReg != RISCV::NoRegister) {

21363assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F &&"Unknown fp-reg");

21364if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {

21365unsigned RegNo = FReg - RISCV::F0_F;

21366unsigned DReg = RISCV::F0_D + RegNo;

21367return std::make_pair(DReg, &RISCV::FPR64RegClass);

21368 }

21369if (VT == MVT::f32 || VT == MVT::Other)

21370return std::make_pair(FReg, &RISCV::FPR32RegClass);

21371if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {

21372unsigned RegNo = FReg - RISCV::F0_F;

21373unsigned HReg = RISCV::F0_H + RegNo;

21374return std::make_pair(HReg, &RISCV::FPR16RegClass);

21375 }

21376 }

21377 }

21378

21379if (Subtarget.hasVInstructions()) {

21380Register VReg =StringSwitch<Register>(Constraint.lower())

21381 .Case("{v0}", RISCV::V0)

21382 .Case("{v1}", RISCV::V1)

21383 .Case("{v2}", RISCV::V2)

21384 .Case("{v3}", RISCV::V3)

21385 .Case("{v4}", RISCV::V4)

21386 .Case("{v5}", RISCV::V5)

21387 .Case("{v6}", RISCV::V6)

21388 .Case("{v7}", RISCV::V7)

21389 .Case("{v8}", RISCV::V8)

21390 .Case("{v9}", RISCV::V9)

21391 .Case("{v10}", RISCV::V10)

21392 .Case("{v11}", RISCV::V11)

21393 .Case("{v12}", RISCV::V12)

21394 .Case("{v13}", RISCV::V13)

21395 .Case("{v14}", RISCV::V14)

21396 .Case("{v15}", RISCV::V15)

21397 .Case("{v16}", RISCV::V16)

21398 .Case("{v17}", RISCV::V17)

21399 .Case("{v18}", RISCV::V18)

21400 .Case("{v19}", RISCV::V19)

21401 .Case("{v20}", RISCV::V20)

21402 .Case("{v21}", RISCV::V21)

21403 .Case("{v22}", RISCV::V22)

21404 .Case("{v23}", RISCV::V23)

21405 .Case("{v24}", RISCV::V24)

21406 .Case("{v25}", RISCV::V25)

21407 .Case("{v26}", RISCV::V26)

21408 .Case("{v27}", RISCV::V27)

21409 .Case("{v28}", RISCV::V28)

21410 .Case("{v29}", RISCV::V29)

21411 .Case("{v30}", RISCV::V30)

21412 .Case("{v31}", RISCV::V31)

21413 .Default(RISCV::NoRegister);

21414if (VReg != RISCV::NoRegister) {

21415if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))

21416return std::make_pair(VReg, &RISCV::VMRegClass);

21417if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))

21418return std::make_pair(VReg, &RISCV::VRRegClass);

21419for (constauto *RC :

21420 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {

21421if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {

21422 VReg =TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);

21423return std::make_pair(VReg, RC);

21424 }

21425 }

21426 }

21427 }

21428

21429 std::pair<Register, const TargetRegisterClass *> Res =

21430TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

21431

21432// If we picked one of the Zfinx register classes, remap it to the GPR class.

21433// FIXME: When Zfinx is supported in CodeGen this will need to take the

21434// Subtarget into account.

21435if (Res.second == &RISCV::GPRF16RegClass ||

21436 Res.second == &RISCV::GPRF32RegClass ||

21437 Res.second == &RISCV::GPRPairRegClass)

21438return std::make_pair(Res.first, &RISCV::GPRRegClass);

21439

21440return Res;

21441}

21442

21443InlineAsm::ConstraintCode

21444RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const{

21445// Currently only support length 1 constraints.

21446if (ConstraintCode.size() == 1) {

21447switch (ConstraintCode[0]) {

21448case'A':

21449returnInlineAsm::ConstraintCode::A;

21450default:

21451break;

21452 }

21453 }

21454

21455returnTargetLowering::getInlineAsmMemConstraint(ConstraintCode);

21456}

21457

21458voidRISCVTargetLowering::LowerAsmOperandForConstraint(

21459SDValue Op,StringRef Constraint, std::vector<SDValue> &Ops,

21460SelectionDAG &DAG) const{

21461// Currently only support length 1 constraints.

21462if (Constraint.size() == 1) {

21463switch (Constraint[0]) {

21464case'I':

21465// Validate & create a 12-bit signed immediate operand.

21466if (auto *C = dyn_cast<ConstantSDNode>(Op)) {

21467uint64_t CVal =C->getSExtValue();

21468if (isInt<12>(CVal))

21469 Ops.push_back(DAG.getSignedTargetConstant(CVal,SDLoc(Op),

21470 Subtarget.getXLenVT()));

21471 }

21472return;

21473case'J':

21474// Validate & create an integer zero operand.

21475if (isNullConstant(Op))

21476 Ops.push_back(

21477 DAG.getTargetConstant(0,SDLoc(Op), Subtarget.getXLenVT()));

21478return;

21479case'K':

21480// Validate & create a 5-bit unsigned immediate operand.

21481if (auto *C = dyn_cast<ConstantSDNode>(Op)) {

21482uint64_t CVal =C->getZExtValue();

21483if (isUInt<5>(CVal))

21484 Ops.push_back(

21485 DAG.getTargetConstant(CVal,SDLoc(Op), Subtarget.getXLenVT()));

21486 }

21487return;

21488case'S':

21489TargetLowering::LowerAsmOperandForConstraint(Op,"s", Ops, DAG);

21490return;

21491default:

21492break;

21493 }

21494 }

21495TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);

21496}

21497

21498Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,

21499Instruction *Inst,

21500AtomicOrdering Ord) const{

21501if (Subtarget.hasStdExtZtso()) {

21502if (isa<LoadInst>(Inst) && Ord ==AtomicOrdering::SequentiallyConsistent)

21503return Builder.CreateFence(Ord);

21504returnnullptr;

21505 }

21506

21507if (isa<LoadInst>(Inst) && Ord ==AtomicOrdering::SequentiallyConsistent)

21508return Builder.CreateFence(Ord);

21509if (isa<StoreInst>(Inst) &&isReleaseOrStronger(Ord))

21510return Builder.CreateFence(AtomicOrdering::Release);

21511returnnullptr;

21512}

21513

21514Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,

21515Instruction *Inst,

21516AtomicOrdering Ord) const{

21517if (Subtarget.hasStdExtZtso()) {

21518if (isa<StoreInst>(Inst) && Ord ==AtomicOrdering::SequentiallyConsistent)

21519return Builder.CreateFence(Ord);

21520returnnullptr;

21521 }

21522

21523if (isa<LoadInst>(Inst) &&isAcquireOrStronger(Ord))

21524return Builder.CreateFence(AtomicOrdering::Acquire);

21525if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&

21526 Ord ==AtomicOrdering::SequentiallyConsistent)

21527return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);

21528returnnullptr;

21529}

21530

21531TargetLowering::AtomicExpansionKind

21532RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const{

21533// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating

21534// point operations can't be used in an lr/sc sequence without breaking the

21535// forward-progress guarantee.

21536if (AI->isFloatingPointOperation() ||

21537 AI->getOperation() ==AtomicRMWInst::UIncWrap ||

21538 AI->getOperation() ==AtomicRMWInst::UDecWrap ||

21539 AI->getOperation() ==AtomicRMWInst::USubCond ||

21540 AI->getOperation() ==AtomicRMWInst::USubSat)

21541returnAtomicExpansionKind::CmpXChg;

21542

21543// Don't expand forced atomics, we want to have __sync libcalls instead.

21544if (Subtarget.hasForcedAtomics())

21545returnAtomicExpansionKind::None;

21546

21547unsignedSize = AI->getType()->getPrimitiveSizeInBits();

21548if (AI->getOperation() ==AtomicRMWInst::Nand) {

21549if (Subtarget.hasStdExtZacas() &&

21550 (Size >= 32 || Subtarget.hasStdExtZabha()))

21551returnAtomicExpansionKind::CmpXChg;

21552if (Size < 32)

21553returnAtomicExpansionKind::MaskedIntrinsic;

21554 }

21555

21556if (Size < 32 && !Subtarget.hasStdExtZabha())

21557returnAtomicExpansionKind::MaskedIntrinsic;

21558

21559returnAtomicExpansionKind::None;

21560}

21561

21562staticIntrinsic::ID

21563getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen,AtomicRMWInst::BinOp BinOp) {

21564if (XLen == 32) {

21565switch (BinOp) {

21566default:

21567llvm_unreachable("Unexpected AtomicRMW BinOp");

21568caseAtomicRMWInst::Xchg:

21569return Intrinsic::riscv_masked_atomicrmw_xchg_i32;

21570caseAtomicRMWInst::Add:

21571return Intrinsic::riscv_masked_atomicrmw_add_i32;

21572caseAtomicRMWInst::Sub:

21573return Intrinsic::riscv_masked_atomicrmw_sub_i32;

21574caseAtomicRMWInst::Nand:

21575return Intrinsic::riscv_masked_atomicrmw_nand_i32;

21576caseAtomicRMWInst::Max:

21577return Intrinsic::riscv_masked_atomicrmw_max_i32;

21578caseAtomicRMWInst::Min:

21579return Intrinsic::riscv_masked_atomicrmw_min_i32;

21580caseAtomicRMWInst::UMax:

21581return Intrinsic::riscv_masked_atomicrmw_umax_i32;

21582caseAtomicRMWInst::UMin:

21583return Intrinsic::riscv_masked_atomicrmw_umin_i32;

21584 }

21585 }

21586

21587if (XLen == 64) {

21588switch (BinOp) {

21589default:

21590llvm_unreachable("Unexpected AtomicRMW BinOp");

21591caseAtomicRMWInst::Xchg:

21592return Intrinsic::riscv_masked_atomicrmw_xchg_i64;

21593caseAtomicRMWInst::Add:

21594return Intrinsic::riscv_masked_atomicrmw_add_i64;

21595caseAtomicRMWInst::Sub:

21596return Intrinsic::riscv_masked_atomicrmw_sub_i64;

21597caseAtomicRMWInst::Nand:

21598return Intrinsic::riscv_masked_atomicrmw_nand_i64;

21599caseAtomicRMWInst::Max:

21600return Intrinsic::riscv_masked_atomicrmw_max_i64;

21601caseAtomicRMWInst::Min:

21602return Intrinsic::riscv_masked_atomicrmw_min_i64;

21603caseAtomicRMWInst::UMax:

21604return Intrinsic::riscv_masked_atomicrmw_umax_i64;

21605caseAtomicRMWInst::UMin:

21606return Intrinsic::riscv_masked_atomicrmw_umin_i64;

21607 }

21608 }

21609

21610llvm_unreachable("Unexpected XLen\n");

21611}

21612

21613Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(

21614IRBuilderBase &Builder,AtomicRMWInst *AI,Value *AlignedAddr,Value *Incr,

21615Value *Mask,Value *ShiftAmt,AtomicOrdering Ord) const{

21616// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace

21617// the atomic instruction with an AtomicRMWInst::And/Or with appropriate

21618// mask, as this produces better code than the LR/SC loop emitted by

21619// int_riscv_masked_atomicrmw_xchg.

21620if (AI->getOperation() ==AtomicRMWInst::Xchg &&

21621 isa<ConstantInt>(AI->getValOperand())) {

21622ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());

21623if (CVal->isZero())

21624return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,

21625 Builder.CreateNot(Mask,"Inv_Mask"),

21626 AI->getAlign(), Ord);

21627if (CVal->isMinusOne())

21628return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,

21629 AI->getAlign(), Ord);

21630 }

21631

21632unsigned XLen = Subtarget.getXLen();

21633Value *Ordering =

21634 Builder.getIntN(XLen,static_cast<uint64_t>(AI->getOrdering()));

21635Type *Tys[] = {AlignedAddr->getType()};

21636Function *LrwOpScwLoop =Intrinsic::getOrInsertDeclaration(

21637 AI->getModule(),

21638getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);

21639

21640if (XLen == 64) {

21641 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());

21642 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());

21643 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());

21644 }

21645

21646Value *Result;

21647

21648// Must pass the shift amount needed to sign extend the loaded value prior

21649// to performing a signed comparison for min/max. ShiftAmt is the number of

21650// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which

21651// is the number of bits to left+right shift the value in order to

21652// sign-extend.

21653if (AI->getOperation() ==AtomicRMWInst::Min ||

21654 AI->getOperation() ==AtomicRMWInst::Max) {

21655constDataLayout &DL = AI->getDataLayout();

21656unsigned ValWidth =

21657DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());

21658Value *SextShamt =

21659 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);

21660 Result = Builder.CreateCall(LrwOpScwLoop,

21661 {AlignedAddr, Incr, Mask, SextShamt, Ordering});

21662 }else {

21663 Result =

21664 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});

21665 }

21666

21667if (XLen == 64)

21668 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());

21669return Result;

21670}

21671

21672TargetLowering::AtomicExpansionKind

21673RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(

21674AtomicCmpXchgInst *CI) const{

21675// Don't expand forced atomics, we want to have __sync libcalls instead.

21676if (Subtarget.hasForcedAtomics())

21677returnAtomicExpansionKind::None;

21678

21679unsignedSize = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();

21680if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&

21681 (Size == 8 ||Size == 16))

21682returnAtomicExpansionKind::MaskedIntrinsic;

21683returnAtomicExpansionKind::None;

21684}

21685

21686Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(

21687IRBuilderBase &Builder,AtomicCmpXchgInst *CI,Value *AlignedAddr,

21688Value *CmpVal,Value *NewVal,Value *Mask,AtomicOrdering Ord) const{

21689unsigned XLen = Subtarget.getXLen();

21690Value *Ordering = Builder.getIntN(XLen,static_cast<uint64_t>(Ord));

21691Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;

21692if (XLen == 64) {

21693 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());

21694 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());

21695 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());

21696 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;

21697 }

21698Type *Tys[] = {AlignedAddr->getType()};

21699Value *Result = Builder.CreateIntrinsic(

21700 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});

21701if (XLen == 64)

21702 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());

21703return Result;

21704}

21705

21706boolRISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,

21707EVT DataVT) const{

21708// We have indexed loads for all supported EEW types. Indices are always

21709// zero extended.

21710return Extend.getOpcode() ==ISD::ZERO_EXTEND &&

21711isTypeLegal(Extend.getValueType()) &&

21712isTypeLegal(Extend.getOperand(0).getValueType()) &&

21713 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;

21714}

21715

21716boolRISCVTargetLowering::shouldConvertFpToSat(unsignedOp,EVT FPVT,

21717EVT VT) const{

21718if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())

21719returnfalse;

21720

21721switch (FPVT.getSimpleVT().SimpleTy) {

21722case MVT::f16:

21723return Subtarget.hasStdExtZfhmin();

21724case MVT::f32:

21725return Subtarget.hasStdExtF();

21726case MVT::f64:

21727return Subtarget.hasStdExtD();

21728default:

21729returnfalse;

21730 }

21731}

21732

21733unsignedRISCVTargetLowering::getJumpTableEncoding() const{

21734// If we are using the small code model, we can reduce size of jump table

21735// entry to 4 bytes.

21736if (Subtarget.is64Bit() && !isPositionIndependent() &&

21737getTargetMachine().getCodeModel() ==CodeModel::Small) {

21738returnMachineJumpTableInfo::EK_Custom32;

21739 }

21740returnTargetLowering::getJumpTableEncoding();

21741}

21742

21743constMCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(

21744constMachineJumpTableInfo *MJTI,constMachineBasicBlock *MBB,

21745unsigned uid,MCContext &Ctx) const{

21746assert(Subtarget.is64Bit() && !isPositionIndependent() &&

21747getTargetMachine().getCodeModel() ==CodeModel::Small);

21748returnMCSymbolRefExpr::create(MBB->getSymbol(), Ctx);

21749}

21750

21751boolRISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const{

21752// We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power

21753// of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be

21754// a power of two as well.

21755// FIXME: This doesn't work for zve32, but that's already broken

21756// elsewhere for the same reason.

21757assert(Subtarget.getRealMinVLen() >= 64 &&"zve32* unsupported");

21758static_assert(RISCV::RVVBitsPerBlock == 64,

21759"RVVBitsPerBlock changed, audit needed");

21760returntrue;

21761}

21762

21763boolRISCVTargetLowering::getIndexedAddressParts(SDNode *Op,SDValue &Base,

21764SDValue &Offset,

21765ISD::MemIndexedMode &AM,

21766SelectionDAG &DAG) const{

21767// Target does not support indexed loads.

21768if (!Subtarget.hasVendorXTHeadMemIdx())

21769returnfalse;

21770

21771if (Op->getOpcode() !=ISD::ADD &&Op->getOpcode() !=ISD::SUB)

21772returnfalse;

21773

21774Base =Op->getOperand(0);

21775if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {

21776 int64_t RHSC =RHS->getSExtValue();

21777if (Op->getOpcode() ==ISD::SUB)

21778 RHSC = -(uint64_t)RHSC;

21779

21780// The constants that can be encoded in the THeadMemIdx instructions

21781// are of the form (sign_extend(imm5) << imm2).

21782bool isLegalIndexedOffset =false;

21783for (unsigned i = 0; i < 4; i++)

21784if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {

21785 isLegalIndexedOffset =true;

21786break;

21787 }

21788

21789if (!isLegalIndexedOffset)

21790returnfalse;

21791

21792Offset =Op->getOperand(1);

21793returntrue;

21794 }

21795

21796returnfalse;

21797}

21798

21799boolRISCVTargetLowering::getPreIndexedAddressParts(SDNode *N,SDValue &Base,

21800SDValue &Offset,

21801ISD::MemIndexedMode &AM,

21802SelectionDAG &DAG) const{

21803EVT VT;

21804SDValue Ptr;

21805if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

21806 VT = LD->getMemoryVT();

21807Ptr = LD->getBasePtr();

21808 }elseif (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

21809 VT = ST->getMemoryVT();

21810Ptr = ST->getBasePtr();

21811 }else

21812returnfalse;

21813

21814if (!getIndexedAddressParts(Ptr.getNode(),Base,Offset, AM, DAG))

21815returnfalse;

21816

21817 AM =ISD::PRE_INC;

21818returntrue;

21819}

21820

21821boolRISCVTargetLowering::getPostIndexedAddressParts(SDNode *N,SDNode *Op,

21822SDValue &Base,

21823SDValue &Offset,

21824ISD::MemIndexedMode &AM,

21825SelectionDAG &DAG) const{

21826if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {

21827if (Op->getOpcode() !=ISD::ADD)

21828returnfalse;

21829

21830if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))

21831Base = LS->getBasePtr();

21832else

21833returnfalse;

21834

21835if (Base ==Op->getOperand(0))

21836Offset =Op->getOperand(1);

21837elseif (Base ==Op->getOperand(1))

21838Offset =Op->getOperand(0);

21839else

21840returnfalse;

21841

21842 AM =ISD::POST_INC;

21843returntrue;

21844 }

21845

21846EVT VT;

21847SDValue Ptr;

21848if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

21849 VT = LD->getMemoryVT();

21850Ptr = LD->getBasePtr();

21851 }elseif (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

21852 VT = ST->getMemoryVT();

21853Ptr = ST->getBasePtr();

21854 }else

21855returnfalse;

21856

21857if (!getIndexedAddressParts(Op,Base,Offset, AM, DAG))

21858returnfalse;

21859// Post-indexing updates the base, so it's not a valid transform

21860// if that's not the same as the load's pointer.

21861if (Ptr !=Base)

21862returnfalse;

21863

21864 AM =ISD::POST_INC;

21865returntrue;

21866}

21867

21868boolRISCVTargetLowering::isFMAFasterThanFMulAndFAdd(constMachineFunction &MF,

21869EVT VT) const{

21870EVT SVT = VT.getScalarType();

21871

21872if (!SVT.isSimple())

21873returnfalse;

21874

21875switch (SVT.getSimpleVT().SimpleTy) {

21876case MVT::f16:

21877return VT.isVector() ? Subtarget.hasVInstructionsF16()

21878 : Subtarget.hasStdExtZfhOrZhinx();

21879case MVT::f32:

21880return Subtarget.hasStdExtFOrZfinx();

21881case MVT::f64:

21882return Subtarget.hasStdExtDOrZdinx();

21883default:

21884break;

21885 }

21886

21887returnfalse;

21888}

21889

21890ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const{

21891// Zacas will use amocas.w which does not require extension.

21892return Subtarget.hasStdExtZacas() ?ISD::ANY_EXTEND :ISD::SIGN_EXTEND;

21893}

21894

21895Register RISCVTargetLowering::getExceptionPointerRegister(

21896constConstant *PersonalityFn) const{

21897return RISCV::X10;

21898}

21899

21900Register RISCVTargetLowering::getExceptionSelectorRegister(

21901constConstant *PersonalityFn) const{

21902return RISCV::X11;

21903}

21904

21905boolRISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const{

21906// Return false to suppress the unnecessary extensions if the LibCall

21907// arguments or return value is a float narrower than XLEN on a soft FP ABI.

21908if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&

21909Type.getSizeInBits() < Subtarget.getXLen()))

21910returnfalse;

21911

21912returntrue;

21913}

21914

21915boolRISCVTargetLowering::shouldSignExtendTypeInLibCall(Type *Ty,

21916bool IsSigned) const{

21917if (Subtarget.is64Bit() && Ty->isIntegerTy(32))

21918returntrue;

21919

21920return IsSigned;

21921}

21922

21923boolRISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context,EVT VT,

21924SDValue C) const{

21925// Check integral scalar types.

21926if (!VT.isScalarInteger())

21927returnfalse;

21928

21929// Omit the optimization if the sub target has the M extension and the data

21930// size exceeds XLen.

21931constbool HasZmmul = Subtarget.hasStdExtZmmul();

21932if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())

21933returnfalse;

21934

21935auto *ConstNode = cast<ConstantSDNode>(C);

21936constAPInt &Imm = ConstNode->getAPIntValue();

21937

21938// Break the MUL to a SLLI and an ADD/SUB.

21939if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||

21940 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())

21941returntrue;

21942

21943// Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.

21944if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&

21945 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||

21946 (Imm - 8).isPowerOf2()))

21947returntrue;

21948

21949// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs

21950// a pair of LUI/ADDI.

21951if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&

21952 ConstNode->hasOneUse()) {

21953APInt ImmS = Imm.ashr(Imm.countr_zero());

21954if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||

21955 (1 - ImmS).isPowerOf2())

21956returntrue;

21957 }

21958

21959returnfalse;

21960}

21961

21962boolRISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,

21963SDValue ConstNode) const{

21964// Let the DAGCombiner decide for vectors.

21965EVT VT = AddNode.getValueType();

21966if (VT.isVector())

21967returntrue;

21968

21969// Let the DAGCombiner decide for larger types.

21970if (VT.getScalarSizeInBits() > Subtarget.getXLen())

21971returntrue;

21972

21973// It is worse if c1 is simm12 while c1*c2 is not.

21974ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));

21975ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);

21976constAPInt &C1 = C1Node->getAPIntValue();

21977constAPInt &C2 = C2Node->getAPIntValue();

21978if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))

21979returnfalse;

21980

21981// Default to true and let the DAGCombiner decide.

21982returntrue;

21983}

21984

21985boolRISCVTargetLowering::allowsMisalignedMemoryAccesses(

21986EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,

21987unsigned *Fast) const{

21988if (!VT.isVector()) {

21989if (Fast)

21990 *Fast = Subtarget.enableUnalignedScalarMem();

21991return Subtarget.enableUnalignedScalarMem();

21992 }

21993

21994// All vector implementations must support element alignment

21995EVT ElemVT = VT.getVectorElementType();

21996if (Alignment >= ElemVT.getStoreSize()) {

21997if (Fast)

21998 *Fast = 1;

21999returntrue;

22000 }

22001

22002// Note: We lower an unmasked unaligned vector access to an equally sized

22003// e8 element type access. Given this, we effectively support all unmasked

22004// misaligned accesses. TODO: Work through the codegen implications of

22005// allowing such accesses to be formed, and considered fast.

22006if (Fast)

22007 *Fast = Subtarget.enableUnalignedVectorMem();

22008return Subtarget.enableUnalignedVectorMem();

22009}

22010

22011

22012EVT RISCVTargetLowering::getOptimalMemOpType(constMemOp &Op,

22013constAttributeList &FuncAttributes) const{

22014if (!Subtarget.hasVInstructions())

22015return MVT::Other;

22016

22017if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))

22018return MVT::Other;

22019

22020// We use LMUL1 memory operations here for a non-obvious reason. Our caller

22021// has an expansion threshold, and we want the number of hardware memory

22022// operations to correspond roughly to that threshold. LMUL>1 operations

22023// are typically expanded linearly internally, and thus correspond to more

22024// than one actual memory operation. Note that store merging and load

22025// combining will typically form larger LMUL operations from the LMUL1

22026// operations emitted here, and that's okay because combining isn't

22027// introducing new memory operations; it's just merging existing ones.

22028constunsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;

22029if (Op.size() < MinVLenInBytes)

22030// TODO: Figure out short memops. For the moment, do the default thing

22031// which ends up using scalar sequences.

22032return MVT::Other;

22033

22034// If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support

22035// fixed vectors.

22036if (MinVLenInBytes <=RISCV::RVVBitsPerBlock / 8)

22037return MVT::Other;

22038

22039// Prefer i8 for non-zero memset as it allows us to avoid materializing

22040// a large scalar constant and instead use vmv.v.x/i to do the

22041// broadcast. For everything else, prefer ELenVT to minimize VL and thus

22042// maximize the chance we can encode the size in the vsetvli.

22043MVT ELenVT =MVT::getIntegerVT(Subtarget.getELen());

22044MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;

22045

22046// Do we have sufficient alignment for our preferred VT? If not, revert

22047// to largest size allowed by our alignment criteria.

22048if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {

22049Align RequiredAlign(PreferredVT.getStoreSize());

22050if (Op.isFixedDstAlign())

22051 RequiredAlign = std::min(RequiredAlign,Op.getDstAlign());

22052if (Op.isMemcpy())

22053 RequiredAlign = std::min(RequiredAlign,Op.getSrcAlign());

22054 PreferredVT =MVT::getIntegerVT(RequiredAlign.value() * 8);

22055 }

22056returnMVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());

22057}

22058

22059boolRISCVTargetLowering::splitValueIntoRegisterParts(

22060SelectionDAG &DAG,constSDLoc &DL,SDValue Val,SDValue *Parts,

22061unsigned NumParts,MVT PartVT, std::optional<CallingConv::ID>CC) const{

22062bool IsABIRegCopy =CC.has_value();

22063EVT ValueVT = Val.getValueType();

22064

22065MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;

22066if ((ValueVT == PairVT ||

22067 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&

22068 ValueVT == MVT::f64)) &&

22069 NumParts == 1 && PartVT == MVT::Untyped) {

22070// Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx

22071MVT XLenVT = Subtarget.getXLenVT();

22072if (ValueVT == MVT::f64)

22073 Val = DAG.getBitcast(MVT::i64, Val);

22074auto [Lo,Hi] = DAG.SplitScalar(Val,DL, XLenVT, XLenVT);

22075// Always creating an MVT::Untyped part, so always use

22076// RISCVISD::BuildGPRPair.

22077 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair,DL, PartVT,Lo,Hi);

22078returntrue;

22079 }

22080

22081if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&

22082 PartVT == MVT::f32) {

22083// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float

22084// nan, and cast to f32.

22085 Val = DAG.getNode(ISD::BITCAST,DL, MVT::i16, Val);

22086 Val = DAG.getNode(ISD::ANY_EXTEND,DL, MVT::i32, Val);

22087 Val = DAG.getNode(ISD::OR,DL, MVT::i32, Val,

22088 DAG.getConstant(0xFFFF0000,DL, MVT::i32));

22089 Val = DAG.getNode(ISD::BITCAST,DL, PartVT, Val);

22090 Parts[0] = Val;

22091returntrue;

22092 }

22093

22094if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {

22095#ifndef NDEBUG

22096unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();

22097 [[maybe_unused]]unsigned ValLMUL =

22098divideCeil(ValueVT.getSizeInBits().getKnownMinValue(),

22099 ValNF *RISCV::RVVBitsPerBlock);

22100unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();

22101 [[maybe_unused]]unsigned PartLMUL =

22102divideCeil(PartVT.getSizeInBits().getKnownMinValue(),

22103 PartNF *RISCV::RVVBitsPerBlock);

22104assert(ValNF == PartNF && ValLMUL == PartLMUL &&

22105"RISC-V vector tuple type only accepts same register class type "

22106"TUPLE_INSERT");

22107#endif

22108

22109 Val = DAG.getNode(RISCVISD::TUPLE_INSERT,DL, PartVT, DAG.getUNDEF(PartVT),

22110 Val, DAG.getVectorIdxConstant(0,DL));

22111 Parts[0] = Val;

22112returntrue;

22113 }

22114

22115if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {

22116LLVMContext &Context = *DAG.getContext();

22117EVT ValueEltVT = ValueVT.getVectorElementType();

22118EVT PartEltVT = PartVT.getVectorElementType();

22119unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();

22120unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();

22121if (PartVTBitSize % ValueVTBitSize == 0) {

22122assert(PartVTBitSize >= ValueVTBitSize);

22123// If the element types are different, bitcast to the same element type of

22124// PartVT first.

22125// Give an example here, we want copy a <vscale x 1 x i8> value to

22126// <vscale x 4 x i16>.

22127// We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert

22128// subvector, then we can bitcast to <vscale x 4 x i16>.

22129if (ValueEltVT != PartEltVT) {

22130if (PartVTBitSize > ValueVTBitSize) {

22131unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();

22132assert(Count != 0 &&"The number of element should not be zero.");

22133EVT SameEltTypeVT =

22134EVT::getVectorVT(Context, ValueEltVT, Count,/*IsScalable=*/true);

22135 Val = DAG.getNode(ISD::INSERT_SUBVECTOR,DL, SameEltTypeVT,

22136 DAG.getUNDEF(SameEltTypeVT), Val,

22137 DAG.getVectorIdxConstant(0,DL));

22138 }

22139 Val = DAG.getNode(ISD::BITCAST,DL, PartVT, Val);

22140 }else {

22141 Val =

22142 DAG.getNode(ISD::INSERT_SUBVECTOR,DL, PartVT, DAG.getUNDEF(PartVT),

22143 Val, DAG.getVectorIdxConstant(0,DL));

22144 }

22145 Parts[0] = Val;

22146returntrue;

22147 }

22148 }

22149

22150returnfalse;

22151}

22152

22153SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(

22154SelectionDAG &DAG,constSDLoc &DL,constSDValue *Parts,unsigned NumParts,

22155MVT PartVT,EVT ValueVT, std::optional<CallingConv::ID>CC) const{

22156bool IsABIRegCopy =CC.has_value();

22157

22158MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;

22159if ((ValueVT == PairVT ||

22160 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&

22161 ValueVT == MVT::f64)) &&

22162 NumParts == 1 && PartVT == MVT::Untyped) {

22163// Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx

22164MVT XLenVT = Subtarget.getXLenVT();

22165

22166SDValue Val = Parts[0];

22167// Always starting with an MVT::Untyped part, so always use

22168// RISCVISD::SplitGPRPair

22169 Val = DAG.getNode(RISCVISD::SplitGPRPair,DL, DAG.getVTList(XLenVT, XLenVT),

22170 Val);

22171 Val = DAG.getNode(ISD::BUILD_PAIR,DL, PairVT, Val.getValue(0),

22172 Val.getValue(1));

22173if (ValueVT == MVT::f64)

22174 Val = DAG.getBitcast(ValueVT, Val);

22175return Val;

22176 }

22177

22178if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&

22179 PartVT == MVT::f32) {

22180SDValue Val = Parts[0];

22181

22182// Cast the f32 to i32, truncate to i16, and cast back to [b]f16.

22183 Val = DAG.getNode(ISD::BITCAST,DL, MVT::i32, Val);

22184 Val = DAG.getNode(ISD::TRUNCATE,DL, MVT::i16, Val);

22185 Val = DAG.getNode(ISD::BITCAST,DL, ValueVT, Val);

22186return Val;

22187 }

22188

22189if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {

22190LLVMContext &Context = *DAG.getContext();

22191SDValue Val = Parts[0];

22192EVT ValueEltVT = ValueVT.getVectorElementType();

22193EVT PartEltVT = PartVT.getVectorElementType();

22194unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();

22195unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();

22196if (PartVTBitSize % ValueVTBitSize == 0) {

22197assert(PartVTBitSize >= ValueVTBitSize);

22198EVT SameEltTypeVT = ValueVT;

22199// If the element types are different, convert it to the same element type

22200// of PartVT.

22201// Give an example here, we want copy a <vscale x 1 x i8> value from

22202// <vscale x 4 x i16>.

22203// We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,

22204// then we can extract <vscale x 1 x i8>.

22205if (ValueEltVT != PartEltVT) {

22206unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();

22207assert(Count != 0 &&"The number of element should not be zero.");

22208 SameEltTypeVT =

22209EVT::getVectorVT(Context, ValueEltVT, Count,/*IsScalable=*/true);

22210 Val = DAG.getNode(ISD::BITCAST,DL, SameEltTypeVT, Val);

22211 }

22212 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR,DL, ValueVT, Val,

22213 DAG.getVectorIdxConstant(0,DL));

22214return Val;

22215 }

22216 }

22217returnSDValue();

22218}

22219

22220boolRISCVTargetLowering::isIntDivCheap(EVT VT,AttributeList Attr) const{

22221// When aggressively optimizing for code size, we prefer to use a div

22222// instruction, as it is usually smaller than the alternative sequence.

22223// TODO: Add vector division?

22224bool OptSize = Attr.hasFnAttr(Attribute::MinSize);

22225return OptSize && !VT.isVector();

22226}

22227

22228boolRISCVTargetLowering::preferScalarizeSplat(SDNode *N) const{

22229// Scalarize zero_ext and sign_ext might stop match to widening instruction in

22230// some situation.

22231unsigned Opc =N->getOpcode();

22232if (Opc ==ISD::ZERO_EXTEND || Opc ==ISD::SIGN_EXTEND)

22233returnfalse;

22234returntrue;

22235}

22236

22237staticValue *useTpOffset(IRBuilderBase &IRB,unsignedOffset) {

22238Module *M = IRB.GetInsertBlock()->getModule();

22239Function *ThreadPointerFunc =

22240Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);

22241return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),

22242 IRB.CreateCall(ThreadPointerFunc),Offset);

22243}

22244

22245Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const{

22246// Fuchsia provides a fixed TLS slot for the stack cookie.

22247// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.

22248if (Subtarget.isTargetFuchsia())

22249returnuseTpOffset(IRB, -0x10);

22250

22251// Android provides a fixed TLS slot for the stack cookie. See the definition

22252// of TLS_SLOT_STACK_GUARD in

22253// https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h

22254if (Subtarget.isTargetAndroid())

22255returnuseTpOffset(IRB, -0x18);

22256

22257Module *M = IRB.GetInsertBlock()->getModule();

22258

22259if (M->getStackProtectorGuard() =="tls") {

22260// Users must specify the offset explicitly

22261intOffset = M->getStackProtectorGuardOffset();

22262returnuseTpOffset(IRB,Offset);

22263 }

22264

22265returnTargetLowering::getIRStackGuard(IRB);

22266}

22267

22268boolRISCVTargetLowering::isLegalInterleavedAccessType(

22269VectorType *VTy,unsigned Factor,Align Alignment,unsigned AddrSpace,

22270constDataLayout &DL) const{

22271EVT VT =getValueType(DL, VTy);

22272// Don't lower vlseg/vsseg for vector types that can't be split.

22273if (!isTypeLegal(VT))

22274returnfalse;

22275

22276if (!isLegalElementTypeForRVV(VT.getScalarType()) ||

22277 !allowsMemoryAccessForAlignment(VTy->getContext(),DL, VT, AddrSpace,

22278 Alignment))

22279returnfalse;

22280

22281MVT ContainerVT = VT.getSimpleVT();

22282

22283if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {

22284if (!Subtarget.useRVVForFixedLengthVectors())

22285returnfalse;

22286// Sometimes the interleaved access pass picks up splats as interleaves of

22287// one element. Don't lower these.

22288if (FVTy->getNumElements() < 2)

22289returnfalse;

22290

22291 ContainerVT =getContainerForFixedLengthVector(VT.getSimpleVT());

22292 }else {

22293// The intrinsics for scalable vectors are not overloaded on pointer type

22294// and can only handle the default address space.

22295if (AddrSpace)

22296returnfalse;

22297 }

22298

22299// Need to make sure that EMUL * NFIELDS ≤ 8

22300auto [LMUL, Fractional] =RISCVVType::decodeVLMUL(getLMUL(ContainerVT));

22301if (Fractional)

22302returntrue;

22303return Factor * LMUL <= 8;

22304}

22305

22306boolRISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,

22307Align Alignment) const{

22308if (!Subtarget.hasVInstructions())

22309returnfalse;

22310

22311// Only support fixed vectors if we know the minimum vector size.

22312if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())

22313returnfalse;

22314

22315EVT ScalarType = DataType.getScalarType();

22316if (!isLegalElementTypeForRVV(ScalarType))

22317returnfalse;

22318

22319if (!Subtarget.enableUnalignedVectorMem() &&

22320 Alignment < ScalarType.getStoreSize())

22321returnfalse;

22322

22323returntrue;

22324}

22325

22326staticconstIntrinsic::ID FixedVlsegIntrIds[] = {

22327 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,

22328 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,

22329 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,

22330 Intrinsic::riscv_seg8_load};

22331

22332/// Lower an interleaved load into a vlsegN intrinsic.

22333///

22334/// E.g. Lower an interleaved load (Factor = 2):

22335/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr

22336/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements

22337/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements

22338///

22339/// Into:

22340/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(

22341/// %ptr, i64 4)

22342/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0

22343/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1

22344boolRISCVTargetLowering::lowerInterleavedLoad(

22345LoadInst *LI,ArrayRef<ShuffleVectorInst *> Shuffles,

22346ArrayRef<unsigned> Indices,unsigned Factor) const{

22347assert(Indices.size() == Shuffles.size());

22348

22349IRBuilder<> Builder(LI);

22350

22351auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());

22352if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),

22353 LI->getPointerAddressSpace(),

22354 LI->getDataLayout()))

22355returnfalse;

22356

22357auto *XLenTy =Type::getIntNTy(LI->getContext(), Subtarget.getXLen());

22358

22359// If the segment load is going to be performed segment at a time anyways

22360// and there's only one element used, use a strided load instead. This

22361// will be equally fast, and create less vector register pressure.

22362if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {

22363unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;

22364Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);

22365Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);

22366Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(),Offset);

22367Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());

22368Value *VL = Builder.getInt32(VTy->getNumElements());

22369

22370CallInst *CI =

22371 Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,

22372 {VTy, BasePtr->getType(), Stride->getType()},

22373 {BasePtr, Stride, Mask, VL});

22374 CI->addParamAttr(

22375 0,Attribute::getWithAlignment(CI->getContext(), LI->getAlign()));

22376 Shuffles[0]->replaceAllUsesWith(CI);

22377returntrue;

22378 };

22379

22380Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());

22381

22382CallInst *VlsegN = Builder.CreateIntrinsic(

22383FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},

22384 {LI->getPointerOperand(), VL});

22385

22386for (unsigned i = 0; i < Shuffles.size(); i++) {

22387Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);

22388 Shuffles[i]->replaceAllUsesWith(SubVec);

22389 }

22390

22391returntrue;

22392}

22393

22394staticconstIntrinsic::ID FixedVssegIntrIds[] = {

22395 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,

22396 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,

22397 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,

22398 Intrinsic::riscv_seg8_store};

22399

22400/// Lower an interleaved store into a vssegN intrinsic.

22401///

22402/// E.g. Lower an interleaved store (Factor = 3):

22403/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,

22404/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>

22405/// store <12 x i32> %i.vec, <12 x i32>* %ptr

22406///

22407/// Into:

22408/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>

22409/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>

22410/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>

22411/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,

22412/// %ptr, i32 4)

22413///

22414/// Note that the new shufflevectors will be removed and we'll only generate one

22415/// vsseg3 instruction in CodeGen.

22416boolRISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,

22417ShuffleVectorInst *SVI,

22418unsigned Factor) const{

22419IRBuilder<> Builder(SI);

22420auto Mask = SVI->getShuffleMask();

22421auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());

22422// Given SVI : <n*factor x ty>, then VTy : <n x ty>

22423auto *VTy =FixedVectorType::get(ShuffleVTy->getElementType(),

22424 ShuffleVTy->getNumElements() / Factor);

22425if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),

22426 SI->getPointerAddressSpace(),

22427 SI->getDataLayout()))

22428returnfalse;

22429

22430auto *XLenTy =Type::getIntNTy(SI->getContext(), Subtarget.getXLen());

22431

22432unsigned Index;

22433// If the segment store only has one active lane (i.e. the interleave is

22434// just a spread shuffle), we can use a strided store instead. This will

22435// be equally fast, and create less vector register pressure.

22436if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&

22437isSpreadMask(Mask, Factor, Index)) {

22438unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;

22439Value *Data = SVI->getOperand(0);

22440auto *DataVTy = cast<FixedVectorType>(Data->getType());

22441Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);

22442Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);

22443Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(),Offset);

22444Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());

22445Value *VL = Builder.getInt32(VTy->getNumElements());

22446

22447CallInst *CI = Builder.CreateIntrinsic(

22448 Intrinsic::experimental_vp_strided_store,

22449 {Data->getType(), BasePtr->getType(), Stride->getType()},

22450 {Data, BasePtr, Stride, Mask, VL});

22451 CI->addParamAttr(

22452 1,Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));

22453

22454returntrue;

22455 }

22456

22457Function *VssegNFunc =Intrinsic::getOrInsertDeclaration(

22458 SI->getModule(),FixedVssegIntrIds[Factor - 2],

22459 {VTy, SI->getPointerOperandType(), XLenTy});

22460

22461SmallVector<Value *, 10> Ops;

22462

22463for (unsigned i = 0; i < Factor; i++) {

22464Value *Shuffle = Builder.CreateShuffleVector(

22465 SVI->getOperand(0), SVI->getOperand(1),

22466createSequentialMask(Mask[i], VTy->getNumElements(), 0));

22467 Ops.push_back(Shuffle);

22468 }

22469// This VL should be OK (should be executable in one vsseg instruction,

22470// potentially under larger LMULs) because we checked that the fixed vector

22471// type fits in isLegalInterleavedAccessType

22472Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());

22473 Ops.append({SI->getPointerOperand(), VL});

22474

22475 Builder.CreateCall(VssegNFunc, Ops);

22476

22477returntrue;

22478}

22479

22480boolRISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(

22481LoadInst *LI,ArrayRef<Value *> DeinterleaveValues) const{

22482unsigned Factor = DeinterleaveValues.size();

22483if (Factor > 8)

22484returnfalse;

22485

22486assert(LI->isSimple());

22487IRBuilder<> Builder(LI);

22488

22489auto *ResVTy = cast<VectorType>(DeinterleaveValues[0]->getType());

22490

22491constDataLayout &DL = LI->getDataLayout();

22492

22493if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),

22494 LI->getPointerAddressSpace(),DL))

22495returnfalse;

22496

22497Value *Return;

22498Type *XLenTy =Type::getIntNTy(LI->getContext(), Subtarget.getXLen());

22499

22500if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {

22501Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());

22502 Return =

22503 Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],

22504 {ResVTy, LI->getPointerOperandType(), XLenTy},

22505 {LI->getPointerOperand(), VL});

22506 }else {

22507staticconstIntrinsic::ID IntrIds[] = {

22508 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,

22509 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,

22510 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,

22511 Intrinsic::riscv_vlseg8};

22512

22513unsigned SEW =DL.getTypeSizeInBits(ResVTy->getElementType());

22514unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();

22515Type *VecTupTy =TargetExtType::get(

22516 LI->getContext(),"riscv.vector.tuple",

22517ScalableVectorType::get(Type::getInt8Ty(LI->getContext()),

22518 NumElts * SEW / 8),

22519 Factor);

22520

22521Value *VL =Constant::getAllOnesValue(XLenTy);

22522

22523Value *Vlseg = Builder.CreateIntrinsic(

22524 IntrIds[Factor - 2], {VecTupTy, XLenTy},

22525 {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,

22526 ConstantInt::get(XLenTy,Log2_64(SEW))});

22527

22528SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};

22529 Return =PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));

22530for (unsigned i = 0; i < Factor; ++i) {

22531Value *VecExtract = Builder.CreateIntrinsic(

22532 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},

22533 {Vlseg, Builder.getInt32(i)});

22534 Return = Builder.CreateInsertValue(Return, VecExtract, i);

22535 }

22536 }

22537

22538for (auto [Idx, DIV] :enumerate(DeinterleaveValues)) {

22539// We have to create a brand new ExtractValue to replace each

22540// of these old ExtractValue instructions.

22541Value *NewEV =

22542 Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});

22543 DIV->replaceAllUsesWith(NewEV);

22544 }

22545

22546returntrue;

22547}

22548

22549boolRISCVTargetLowering::lowerInterleaveIntrinsicToStore(

22550StoreInst *SI,ArrayRef<Value *> InterleaveValues) const{

22551unsigned Factor = InterleaveValues.size();

22552if (Factor > 8)

22553returnfalse;

22554

22555assert(SI->isSimple());

22556IRBuilder<> Builder(SI);

22557

22558auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());

22559constDataLayout &DL = SI->getDataLayout();

22560

22561if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),

22562 SI->getPointerAddressSpace(),DL))

22563returnfalse;

22564

22565Type *XLenTy =Type::getIntNTy(SI->getContext(), Subtarget.getXLen());

22566

22567if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {

22568Function *VssegNFunc =Intrinsic::getOrInsertDeclaration(

22569 SI->getModule(),FixedVssegIntrIds[Factor - 2],

22570 {InVTy, SI->getPointerOperandType(), XLenTy});

22571

22572SmallVector<Value *, 10> Ops(InterleaveValues.begin(),

22573 InterleaveValues.end());

22574Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());

22575 Ops.append({SI->getPointerOperand(), VL});

22576

22577 Builder.CreateCall(VssegNFunc, Ops);

22578 }else {

22579staticconstIntrinsic::ID IntrIds[] = {

22580 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,

22581 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,

22582 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,

22583 Intrinsic::riscv_vsseg8};

22584

22585unsigned SEW =DL.getTypeSizeInBits(InVTy->getElementType());

22586unsigned NumElts = InVTy->getElementCount().getKnownMinValue();

22587Type *VecTupTy =TargetExtType::get(

22588 SI->getContext(),"riscv.vector.tuple",

22589ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),

22590 NumElts * SEW / 8),

22591 Factor);

22592

22593Function *VssegNFunc =Intrinsic::getOrInsertDeclaration(

22594 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});

22595

22596Value *VL =Constant::getAllOnesValue(XLenTy);

22597

22598Value *StoredVal =PoisonValue::get(VecTupTy);

22599for (unsigned i = 0; i < Factor; ++i)

22600 StoredVal = Builder.CreateIntrinsic(

22601 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},

22602 {StoredVal, InterleaveValues[i], Builder.getInt32(i)});

22603

22604 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,

22605 ConstantInt::get(XLenTy,Log2_64(SEW))});

22606 }

22607

22608returntrue;

22609}

22610

22611MachineInstr *

22612RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,

22613MachineBasicBlock::instr_iterator &MBBI,

22614constTargetInstrInfo *TII) const{

22615assert(MBBI->isCall() &&MBBI->getCFIType() &&

22616"Invalid call instruction for a KCFI check");

22617assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},

22618MBBI->getOpcode()));

22619

22620MachineOperand &Target =MBBI->getOperand(0);

22621Target.setIsRenamable(false);

22622

22623returnBuildMI(MBB,MBBI,MBBI->getDebugLoc(),TII->get(RISCV::KCFI_CHECK))

22624 .addReg(Target.getReg())

22625 .addImm(MBBI->getCFIType())

22626 .getInstr();

22627}

22628

22629#define GET_REGISTER_MATCHER

22630#include "RISCVGenAsmMatcher.inc"

22631

22632Register

22633RISCVTargetLowering::getRegisterByName(constchar *RegName,LLT VT,

22634constMachineFunction &MF) const{

22635Register Reg =MatchRegisterAltName(RegName);

22636if (Reg == RISCV::NoRegister)

22637 Reg =MatchRegisterName(RegName);

22638if (Reg == RISCV::NoRegister)

22639report_fatal_error(

22640Twine("Invalid register name \"" +StringRef(RegName) +"\"."));

22641BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);

22642if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))

22643report_fatal_error(Twine("Trying to obtain non-reserved register \"" +

22644StringRef(RegName) +"\"."));

22645return Reg;

22646}

22647

22648MachineMemOperand::Flags

22649RISCVTargetLowering::getTargetMMOFlags(constInstruction &I) const{

22650constMDNode *NontemporalInfo =I.getMetadata(LLVMContext::MD_nontemporal);

22651

22652if (NontemporalInfo ==nullptr)

22653returnMachineMemOperand::MONone;

22654

22655// 1 for default value work as __RISCV_NTLH_ALL

22656// 2 -> __RISCV_NTLH_INNERMOST_PRIVATE

22657// 3 -> __RISCV_NTLH_ALL_PRIVATE

22658// 4 -> __RISCV_NTLH_INNERMOST_SHARED

22659// 5 -> __RISCV_NTLH_ALL

22660int NontemporalLevel = 5;

22661constMDNode *RISCVNontemporalInfo =

22662I.getMetadata("riscv-nontemporal-domain");

22663if (RISCVNontemporalInfo !=nullptr)

22664 NontemporalLevel =

22665 cast<ConstantInt>(

22666 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))

22667 ->getValue())

22668 ->getZExtValue();

22669

22670assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&

22671"RISC-V target doesn't support this non-temporal domain.");

22672

22673 NontemporalLevel -= 2;

22674MachineMemOperand::Flags Flags =MachineMemOperand::MONone;

22675if (NontemporalLevel & 0b1)

22676 Flags |=MONontemporalBit0;

22677if (NontemporalLevel & 0b10)

22678 Flags |=MONontemporalBit1;

22679

22680return Flags;

22681}

22682

22683MachineMemOperand::Flags

22684RISCVTargetLowering::getTargetMMOFlags(constMemSDNode &Node) const{

22685

22686MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();

22687MachineMemOperand::Flags TargetFlags =MachineMemOperand::MONone;

22688 TargetFlags |= (NodeFlags &MONontemporalBit0);

22689 TargetFlags |= (NodeFlags &MONontemporalBit1);

22690return TargetFlags;

22691}

22692

22693boolRISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(

22694constMemSDNode &NodeX,constMemSDNode &NodeY) const{

22695returngetTargetMMOFlags(NodeX) ==getTargetMMOFlags(NodeY);

22696}

22697

22698boolRISCVTargetLowering::isCtpopFast(EVT VT) const{

22699if (VT.isScalableVector())

22700returnisTypeLegal(VT) && Subtarget.hasStdExtZvbb();

22701if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())

22702returntrue;

22703return Subtarget.hasStdExtZbb() &&

22704 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());

22705}

22706

22707unsignedRISCVTargetLowering::getCustomCtpopCost(EVT VT,

22708ISD::CondCode Cond) const{

22709returnisCtpopFast(VT) ? 0 : 1;

22710}

22711

22712boolRISCVTargetLowering::shouldInsertFencesForAtomic(

22713constInstruction *I) const{

22714if (Subtarget.hasStdExtZalasr()) {

22715if (Subtarget.hasStdExtZtso()) {

22716// Zalasr + TSO means that atomic_load_acquire and atomic_store_release

22717// should be lowered to plain load/store. The easiest way to do this is

22718// to say we should insert fences for them, and the fence insertion code

22719// will just not insert any fences

22720auto *LI = dyn_cast<LoadInst>(I);

22721auto *SI = dyn_cast<StoreInst>(I);

22722if ((LI &&

22723 (LI->getOrdering() ==AtomicOrdering::SequentiallyConsistent)) ||

22724 (SI &&

22725 (SI->getOrdering() ==AtomicOrdering::SequentiallyConsistent))) {

22726// Here, this is a load or store which is seq_cst, and needs a .aq or

22727// .rl therefore we shouldn't try to insert fences

22728returnfalse;

22729 }

22730// Here, we are a TSO inst that isn't a seq_cst load/store

22731return isa<LoadInst>(I) || isa<StoreInst>(I);

22732 }

22733returnfalse;

22734 }

22735// Note that one specific case requires fence insertion for an

22736// AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather

22737// than this hook due to limitations in the interface here.

22738return isa<LoadInst>(I) || isa<StoreInst>(I);

22739}

22740

22741boolRISCVTargetLowering::fallBackToDAGISel(constInstruction &Inst) const{

22742

22743// GISel support is in progress or complete for these opcodes.

22744unsignedOp = Inst.getOpcode();

22745if (Op == Instruction::Add ||Op == Instruction::Sub ||

22746Op == Instruction::And ||Op == Instruction::Or ||

22747Op == Instruction::Xor ||Op == Instruction::InsertElement ||

22748Op == Instruction::ShuffleVector ||Op == Instruction::Load ||

22749Op == Instruction::Freeze ||Op == Instruction::Store)

22750returnfalse;

22751

22752if (Inst.getType()->isScalableTy())

22753returntrue;

22754

22755for (unsigned i = 0; i < Inst.getNumOperands(); ++i)

22756if (Inst.getOperand(i)->getType()->isScalableTy() &&

22757 !isa<ReturnInst>(&Inst))

22758returntrue;

22759

22760if (constAllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {

22761if (AI->getAllocatedType()->isScalableTy())

22762returntrue;

22763 }

22764

22765returnfalse;

22766}

22767

22768SDValue

22769RISCVTargetLowering::BuildSDIVPow2(SDNode *N,constAPInt &Divisor,

22770SelectionDAG &DAG,

22771SmallVectorImpl<SDNode *> &Created) const{

22772AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

22773if (isIntDivCheap(N->getValueType(0), Attr))

22774returnSDValue(N, 0);// Lower SDIV as SDIV

22775

22776// Only perform this transform if short forward branch opt is supported.

22777if (!Subtarget.hasShortForwardBranchOpt())

22778returnSDValue();

22779EVT VT =N->getValueType(0);

22780if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))

22781returnSDValue();

22782

22783// Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.

22784if (Divisor.sgt(2048) || Divisor.slt(-2048))

22785returnSDValue();

22786returnTargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);

22787}

22788

22789bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(

22790EVT VT,constAPInt &AndMask) const{

22791if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())

22792return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);

22793returnTargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);

22794}

22795

22796unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const{

22797return Subtarget.getMinimumJumpTableEntries();

22798}

22799

22800SDValue RISCVTargetLowering::expandIndirectJTBranch(constSDLoc &dl,

22801SDValue Value,SDValue Addr,

22802int JTI,

22803SelectionDAG &DAG) const{

22804if (Subtarget.hasStdExtZicfilp()) {

22805// When Zicfilp enabled, we need to use software guarded branch for jump

22806// table branch.

22807SDValue Chain =Value;

22808// Jump table debug info is only needed if CodeView is enabled.

22809if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF())

22810 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);

22811return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain,Addr);

22812 }

22813returnTargetLowering::expandIndirectJTBranch(dl,Value,Addr, JTI, DAG);

22814}

22815

22816// If an output pattern produces multiple instructions tablegen may pick an

22817// arbitrary type from an instructions destination register class to use for the

22818// VT of that MachineSDNode. This VT may be used to look up the representative

22819// register class. If the type isn't legal, the default implementation will

22820// not find a register class.

22821//

22822// Some integer types smaller than XLen are listed in the GPR register class to

22823// support isel patterns for GISel, but are not legal in SelectionDAG. The

22824// arbitrary type tablegen picks may be one of these smaller types.

22825//

22826// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's

22827// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.

22828std::pair<const TargetRegisterClass *, uint8_t>

22829RISCVTargetLowering::findRepresentativeClass(constTargetRegisterInfo *TRI,

22830MVT VT) const{

22831switch (VT.SimpleTy) {

22832default:

22833break;

22834case MVT::i8:

22835case MVT::i16:

22836case MVT::i32:

22837returnTargetLowering::findRepresentativeClass(TRI, Subtarget.getXLenVT());

22838case MVT::bf16:

22839case MVT::f16:

22840returnTargetLowering::findRepresentativeClass(TRI, MVT::f32);

22841 }

22842

22843returnTargetLowering::findRepresentativeClass(TRI, VT);

22844}

22845

22846namespacellvm::RISCVVIntrinsicsTable {

22847

22848#define GET_RISCVVIntrinsicsTable_IMPL

22849#include "RISCVGenSearchableTables.inc"

22850

22851}// namespace llvm::RISCVVIntrinsicsTable

22852

22853boolRISCVTargetLowering::hasInlineStackProbe(constMachineFunction &MF) const{

22854

22855// If the function specifically requests inline stack probes, emit them.

22856if (MF.getFunction().hasFnAttribute("probe-stack"))

22857return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==

22858"inline-asm";

22859

22860returnfalse;

22861}

22862

22863unsignedRISCVTargetLowering::getStackProbeSize(constMachineFunction &MF,

22864Align StackAlign) const{

22865// The default stack probe size is 4096 if the function has no

22866// stack-probe-size attribute.

22867constFunction &Fn = MF.getFunction();

22868unsigned StackProbeSize =

22869 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);

22870// Round down to the stack alignment.

22871 StackProbeSize =alignDown(StackProbeSize, StackAlign.value());

22872return StackProbeSize ? StackProbeSize : StackAlign.value();

22873}

22874

22875SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,

22876SelectionDAG &DAG) const{

22877MachineFunction &MF = DAG.getMachineFunction();

22878if (!hasInlineStackProbe(MF))

22879returnSDValue();

22880

22881MVT XLenVT = Subtarget.getXLenVT();

22882// Get the inputs.

22883SDValue Chain =Op.getOperand(0);

22884SDValue Size =Op.getOperand(1);

22885

22886MaybeAlign Align =

22887 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();

22888SDLoc dl(Op);

22889EVT VT =Op.getValueType();

22890

22891// Construct the new SP value in a GPR.

22892SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);

22893 Chain = SP.getValue(1);

22894 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP,Size);

22895if (Align)

22896 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),

22897 DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));

22898

22899// Set the real SP to the new value with a probing loop.

22900 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);

22901return DAG.getMergeValues({SP, Chain}, dl);

22902}

22903

22904MachineBasicBlock *

22905RISCVTargetLowering::emitDynamicProbedAlloc(MachineInstr &MI,

22906MachineBasicBlock *MBB) const{

22907MachineFunction &MF = *MBB->getParent();

22908MachineBasicBlock::iterator MBBI =MI.getIterator();

22909DebugLoc DL =MBB->findDebugLoc(MBBI);

22910Register TargetReg =MI.getOperand(1).getReg();

22911

22912constRISCVInstrInfo *TII = Subtarget.getInstrInfo();

22913bool IsRV64 = Subtarget.is64Bit();

22914Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();

22915constRISCVTargetLowering *TLI = Subtarget.getTargetLowering();

22916uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);

22917

22918MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());

22919MachineBasicBlock *LoopTestMBB =

22920 MF.CreateMachineBasicBlock(MBB->getBasicBlock());

22921 MF.insert(MBBInsertPoint, LoopTestMBB);

22922MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());

22923 MF.insert(MBBInsertPoint, ExitMBB);

22924Register SPReg = RISCV::X2;

22925Register ScratchReg =

22926 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);

22927

22928// ScratchReg = ProbeSize

22929TII->movImm(*MBB,MBBI,DL, ScratchReg, ProbeSize,MachineInstr::NoFlags);

22930

22931// LoopTest:

22932// SUB SP, SP, ProbeSize

22933BuildMI(*LoopTestMBB, LoopTestMBB->end(),DL,TII->get(RISCV::SUB),SPReg)

22934 .addReg(SPReg)

22935 .addReg(ScratchReg);

22936

22937// s[d|w] zero, 0(sp)

22938BuildMI(*LoopTestMBB, LoopTestMBB->end(),DL,

22939TII->get(IsRV64 ? RISCV::SD : RISCV::SW))

22940 .addReg(RISCV::X0)

22941 .addReg(SPReg)

22942 .addImm(0);

22943

22944// BLT TargetReg, SP, LoopTest

22945BuildMI(*LoopTestMBB, LoopTestMBB->end(),DL,TII->get(RISCV::BLT))

22946 .addReg(TargetReg)

22947 .addReg(SPReg)

22948 .addMBB(LoopTestMBB);

22949

22950// Adjust with: MV SP, TargetReg.

22951BuildMI(*ExitMBB, ExitMBB->end(),DL,TII->get(RISCV::ADDI),SPReg)

22952 .addReg(TargetReg)

22953 .addImm(0);

22954

22955 ExitMBB->splice(ExitMBB->end(),MBB, std::next(MBBI),MBB->end());

22956 ExitMBB->transferSuccessorsAndUpdatePHIs(MBB);

22957

22958 LoopTestMBB->addSuccessor(ExitMBB);

22959 LoopTestMBB->addSuccessor(LoopTestMBB);

22960MBB->addSuccessor(LoopTestMBB);

22961

22962MI.eraseFromParent();

22963 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();

22964return ExitMBB->begin()->getParent();

22965}

MRI

unsigned const MachineRegisterInfo * MRI

Definition:AArch64AdvSIMDScalarPass.cpp:105

MatchRegisterName

static MCRegister MatchRegisterName(StringRef Name)

getContainerForFixedLengthVector

static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)

Definition:AArch64ISelLowering.cpp:28326

performORCombine

static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)

Definition:AArch64ISelLowering.cpp:19359

performANDCombine

static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)

Definition:AArch64ISelLowering.cpp:19567

tryWidenMaskForShuffle

static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)

Definition:AArch64ISelLowering.cpp:13744

performSETCCCombine

static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)

Definition:AArch64ISelLowering.cpp:24965

convertToScalableVector

static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)

Definition:AArch64ISelLowering.cpp:28414

convertFromScalableVector

static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)

Definition:AArch64ISelLowering.cpp:28425

NODE_NAME_CASE

#define NODE_NAME_CASE(node)

Definition:AMDGPUISelLowering.cpp:5459

isConstant

static bool isConstant(const MachineInstr &MI)

Definition:AMDGPUInstructionSelector.cpp:2862

Select

AMDGPU Register Bank Select

Definition:AMDGPURegBankSelect.cpp:71

isZeroOrAllOnes

static bool isZeroOrAllOnes(SDValue N, bool AllOnes)

Definition:ARMISelLowering.cpp:12550

combineSelectAndUseCommutative

static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)

Definition:ARMISelLowering.cpp:12665

LowerATOMIC_FENCE

static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)

Definition:ARMISelLowering.cpp:4357

combineSelectAndUse

static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)

Definition:ARMISelLowering.cpp:12639

MBB

MachineBasicBlock & MBB

Definition:ARMSLSHardening.cpp:71

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Definition:ARMSLSHardening.cpp:73

MBBI

MachineBasicBlock MachineBasicBlock::iterator MBBI

Definition:ARMSLSHardening.cpp:72

MatchRegisterAltName

static MCRegister MatchRegisterAltName(StringRef Name)

Maps from the set of all alternative registernames to a register number.

Results

Function Alias Analysis Results

Definition:AliasAnalysis.cpp:731

getTargetNode

static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)

Definition:BPFISelLowering.cpp:724

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Info

Analysis containing CSE Info

Definition:CSEInfo.cpp:27

convertValVTToLocVT

static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)

Definition:CSKYISelLowering.cpp:199

unpackFromMemLoc

static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)

Definition:CSKYISelLowering.cpp:261

convertLocVTToValVT

static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)

Definition:CSKYISelLowering.cpp:215

emitSelectPseudo

static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)

Definition:CSKYISelLowering.cpp:963

unpackFromRegLoc

static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)

Definition:CSKYISelLowering.cpp:229

CommandLine.h

RetTy

return RetTy

Definition:DeadArgumentElimination.cpp:361

Idx

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

Definition:DeadArgumentElimination.cpp:353

Debug.h

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition:Debug.h:106

DiagnosticInfo.h

DiagnosticPrinter.h

Addr

uint64_t Addr

Definition:ELFObjHandler.cpp:79

Size

uint64_t Size

Definition:ELFObjHandler.cpp:81

End

bool End

Definition:ELF_riscv.cpp:480

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

Check

#define Check(C,...)

Definition:GenericConvergenceVerifierImpl.h:34

#define im(i)

TII

const HexagonInstrInfo * TII

Definition:HexagonCopyToCombine.cpp:125

#define _

Definition:HexagonMCCodeEmitter.cpp:46

IRBuilder.h

IRTranslator LLVM IR MI

Definition:IRTranslator.cpp:112

InstructionCost.h

This file defines an InstructionCost class that is used when calculating the cost of an instruction,...

#define RegName(no)

static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)

Definition:LoongArchISelLowering.cpp:5483

customLegalizeToWOpWithSExt

static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)

Definition:LoongArchISelLowering.cpp:2788

customLegalizeToWOp

static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)

Definition:LoongArchISelLowering.cpp:2754

getIntrinsicForMaskedAtomicRMWBinOp

static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)

Definition:LoongArchISelLowering.cpp:5986

Reduction

loop Loop Strength Reduction

Definition:LoopStrengthReduce.cpp:7191

isSplat

static bool isSplat(Value *V)

Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).

Definition:LowerMatrixIntrinsics.cpp:102

MCCodeEmitter.h

MCInstBuilder.h

#define F(x, y, z)

Definition:MD5.cpp:55

#define I(x, y, z)

Definition:MD5.cpp:58

#define G(x, y, z)

Definition:MD5.cpp:56

Operands

mir Rename Register Operands

Definition:MIRNamerPass.cpp:74

MachineFrameInfo.h

MachineFunction.h

MachineInstrBuilder.h

MachineJumpTableInfo.h

MachineRegisterInfo.h

TRI

unsigned const TargetRegisterInfo * TRI

Definition:MachineSink.cpp:2029

MathExtras.h

MemoryLocation.h

This file provides utility analysis objects describing memory locations.

getReg

static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

Definition:MipsDisassembler.cpp:520

performADDCombine

static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)

Definition:MipsISelLowering.cpp:1129

performSUBCombine

static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)

Definition:MipsISelLowering.cpp:1114

performSELECTCombine

static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)

Definition:MipsISelLowering.cpp:687

performMULCombine

static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)

Definition:MipsSEISelLowering.cpp:867

performXORCombine

static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)

Definition:MipsSEISelLowering.cpp:1036

performVSELECTCombine

static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)

Definition:MipsSEISelLowering.cpp:1019

performSRACombine

static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)

Definition:MipsSEISelLowering.cpp:931

uint64_t IntrinsicInst * II

Definition:NVVMIntrRange.cpp:51

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

getCodeModel

static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)

Definition:PPCAsmPrinter.cpp:479

PatternMatch.h

getName

static StringRef getName(Value *V)

Definition:ProvenanceAnalysisEvaluator.cpp:20

RISCVConstantPoolValue.h

SPReg

static constexpr Register SPReg

Definition:RISCVFrameLowering.cpp:108

getExtensionType

static StringRef getExtensionType(StringRef Ext)

Definition:RISCVISAInfo.cpp:220

performCONCAT_VECTORSCombine

static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)

Definition:RISCVISelLowering.cpp:17080

SplitVectorReductionOp

static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:6612

lowerVECTOR_SHUFFLE

static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:5361

emitBuildPairF64Pseudo

static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:19278

emitQuietFCMP

static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:19332

isElementRotate

static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)

Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...

Definition:RISCVISelLowering.cpp:4576

FixedVlsegIntrIds

static const Intrinsic::ID FixedVlsegIntrIds[]

Definition:RISCVISelLowering.cpp:22326

lowerBuildVectorOfConstants

static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:3687

getLMUL1VT

static MVT getLMUL1VT(MVT VT)

Definition:RISCVISelLowering.cpp:3365

lowerVECTOR_SHUFFLEAsVSlide1

static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)

Match v(f)slide1up/down idioms.

Definition:RISCVISelLowering.cpp:4796

hasPassthruOp

static bool hasPassthruOp(unsigned Opcode)

Return true if a RISC-V target specified op has a passthru operand.

Definition:RISCVISelLowering.cpp:6503

combineTruncToVnclip

static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:17505

getExactInteger

static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)

Definition:RISCVISelLowering.cpp:3379

isInterleaveShuffle

static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)

Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...

Definition:RISCVISelLowering.cpp:4532

narrowIndex

static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)

According to the property that indexed load/store instructions zero-extend their indices,...

Definition:RISCVISelLowering.cpp:14832

getPACKOpcode

static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:3988

promoteVCIXScalar

static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:9669

splatSplitI64WithVL

static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:4376

splatPartsI64WithVL

static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:4320

getWideningInterleave

static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:4924

getAllOnesMask

static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)

Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.

Definition:RISCVISelLowering.cpp:2749

FPImmCost

static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))

lowerScalarSplat

static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:4388

lookupMaskedIntrinsic

static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)

Definition:RISCVISelLowering.cpp:19623

expandMul

static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:14566

performVWADDSUBW_VLCombine

static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:15917

matchIndexAsWiderOp

static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)

Match the index of a gather or scatter operation as an operation with twice the element width and hal...

Definition:RISCVISelLowering.cpp:17404

isLegalBitRotate

static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)

Definition:RISCVISelLowering.cpp:5069

combineOp_VLToVWOp_VL

static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)

Combine a binary or FMA operation to its equivalent VW or VW_W form.

Definition:RISCVISelLowering.cpp:15770

combineVFMADD_VLWithVFNEG_VL

static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:16417

combineOrOfCZERO

static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:14449

useInversedSetcc

static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:16849

lowerDisjointIndicesShuffle

static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Given a shuffle where the indices are disjoint between the two sources, e.g.:

Definition:RISCVISelLowering.cpp:5279

combineVWADDSUBWSelect

static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:15875

EmitLoweredCascadedSelect

static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:19369

performINSERT_VECTOR_ELTCombine

static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)

Definition:RISCVISelLowering.cpp:17008

lowerFABSorFNEG

static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:6273

lowerFMAXIMUM_FMINIMUM

static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:6189

SplitStrictFPVectorOp

static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:6627

tryDemorganOfBooleanCondition

static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:16588

performMemPairCombine

static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)

Definition:RISCVISelLowering.cpp:15991

combineDeMorganOfBoolean

static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:14269

lowerVECTOR_SHUFFLEAsVSlidedown

static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:4675

getRVVReductionOp

static unsigned getRVVReductionOp(unsigned ISDOpcode)

Definition:RISCVISelLowering.cpp:10185

combineSubShiftToOrcB

static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:14183

matchSetCC

static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)

Definition:RISCVISelLowering.cpp:8174

lowerShuffleViaVRegSplitting

static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:5120

getVCIXISDNodeVOID

static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)

Definition:RISCVISelLowering.cpp:10006

lowerFCOPYSIGN

static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:6296

NumRepeatedDivisors

static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))

foldSelectOfCTTZOrCTLZ

static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:16790

lowerFP_TO_INT_SAT

static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:2906

foldBinOpIntoSelectIfProfitable

static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:8273

combineVectorMulToSraBitcast

static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:14745

combineScalarCTPOPToVCPOP

static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:17637

hasMaskOp

static bool hasMaskOp(unsigned Opcode)

Return true if a RISC-V target specified op has a mask operand.

Definition:RISCVISelLowering.cpp:6527

legalizeScatterGatherIndexType

static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)

Definition:RISCVISelLowering.cpp:17336

combineSelectToBinOp

static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:8197

isSpreadMask

static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)

Definition:RISCVISelLowering.cpp:4870

getRISCVVLOp

static unsigned getRISCVVLOp(SDValue Op)

Get a RISC-V target specified VL op for a given SDNode.

Definition:RISCVISelLowering.cpp:6357

getVecReduceOpcode

static unsigned getVecReduceOpcode(unsigned Opc)

Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.

Definition:RISCVISelLowering.cpp:13605

getDefaultVLOps

static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:2765

isPromotedOpNeedingSplit

static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:6545

performFP_TO_INT_SATCombine

static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:16175

lowerReductionSeq

static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.

Definition:RISCVISelLowering.cpp:10317

performVP_REVERSECombine

static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:16255

lowerGetVectorLength

static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:9611

getDefaultScalableVLOps

static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:2756

getVLOperand

static SDValue getVLOperand(SDValue Op)

Definition:RISCVISelLowering.cpp:2580

performVECTOR_SHUFFLECombine

static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)

Definition:RISCVISelLowering.cpp:17206

performVP_STORECombine

static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:16317

emitFROUND

static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:19696

getLargeExternalSymbol

static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:7924

lowerCttzElts

static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:9646

lowerVectorIntrinsicScalars

static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:9430

performSIGN_EXTEND_INREGCombine

static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:14948

lowerVectorXRINT

static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:3315

ExtensionMaxWebSize

static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))

getDeinterleaveShiftAndTrunc

static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:4642

combineBinOpOfZExt

static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:14049

matchSelectAddSub

static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)

Definition:RISCVISelLowering.cpp:16880

isSelectPseudo

static bool isSelectPseudo(MachineInstr &MI)

Definition:RISCVISelLowering.cpp:19315

getSmallestVTForIndex

static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:9065

useRVVForFixedLengthVectorVT

static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:2593

useTpOffset

static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)

Definition:RISCVISelLowering.cpp:22237

combineAddOfBooleanXor

static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:14091

combineTruncOfSraSext

static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:17451

getSingleShuffleSrc

static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)

Definition:RISCVISelLowering.cpp:4496

emitSplitF64Pseudo

static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:19243

emitVFROUND_NOEXCEPT_MASK

static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)

Definition:RISCVISelLowering.cpp:19633

SplitVectorOp

static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:6556

negateFMAOpcode

static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)

Definition:RISCVISelLowering.cpp:16379

lowerScalarInsert

static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:4442

transformAddShlImm

static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:13839

tryFoldSelectIntoOp

static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)

Definition:RISCVISelLowering.cpp:16740

VP_CASE

#define VP_CASE(NODE)

lowerBitreverseShuffle

static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:5017

lowerConstant

static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:5988

matchIndexAsShuffle

static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)

Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...

Definition:RISCVISelLowering.cpp:17369

performVFMADD_VLCombine

static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:16456

combineBinOpToReduce

static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:13733

SplitVPOp

static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:6581

lowerBUILD_VECTOR

static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:4065

processVCIXOperands

static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:9705

widenVectorOpsToi8

static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:10944

lowerINT_TO_FP

static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:2879

lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND

static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:3079

lowerFTRUNC_FCEIL_FFLOOR_FROUND

static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:3287

isSimpleVIDSequence

static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)

Definition:RISCVISelLowering.cpp:3412

getVSlideup

static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)

Definition:RISCVISelLowering.cpp:3354

computeGREVOrGORC

static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)

Definition:RISCVISelLowering.cpp:18870

lowerVECTOR_SHUFFLEAsRotate

static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:5092

matchRoundingOp

static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)

Definition:RISCVISelLowering.cpp:3044

lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND

static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:3186

combineTruncSelectToSMaxUSat

static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:14319

performBITREVERSECombine

static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:16238

transformAddImmMulImm

static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:13989

combineSubOfBoolean

static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:14139

matchSplatAsGather

static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:3518

isValidEGW

static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:9728

combine_CC

static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:16647

isNonZeroAVL

static bool isNonZeroAVL(SDValue AVL)

Definition:RISCVISelLowering.cpp:10308

DEBUG_TYPE

#define DEBUG_TYPE

Definition:RISCVISelLowering.cpp:53

lowerFP_TO_INT

static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:3019

lowerVECTOR_SHUFFLEAsVSlideup

static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:4752

getVCIXISDNodeWCHAIN

static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)

Definition:RISCVISelLowering.cpp:9969

getLargeGlobalAddress

static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:7914

emitReadCounterWidePseudo

static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)

Definition:RISCVISelLowering.cpp:19178

getWideningSpread

static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:4899

AllowSplatInVW_W

static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))

unpackF64OnRV32DSoftABI

static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)

Definition:RISCVISelLowering.cpp:20117

tryMemPairCombine

static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)

Definition:RISCVISelLowering.cpp:15933

getRVVFPReductionOpAndOperands

static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:10400

performFP_TO_INTCombine

static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:16076

combineBinOpOfExtractToReduceTree

static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...

Definition:RISCVISelLowering.cpp:13638

lowerBuildVectorViaPacking

static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Double the element size of the build vector to reduce the number of vslide1down in the build vector c...

Definition:RISCVISelLowering.cpp:4008

performTRUNCATECombine

static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:14383

lowerBuildVectorViaDominantValues

static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...

Definition:RISCVISelLowering.cpp:3581

isCompressMask

static bool isCompressMask(ArrayRef< int > Mask)

Definition:RISCVISelLowering.cpp:5248

translateSetCCForBranch

static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)

Definition:RISCVISelLowering.cpp:2297

combineToVWMACC

static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:17264

performBUILD_VECTORCombine

static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)

If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...

Definition:RISCVISelLowering.cpp:16950

OP_CASE

#define OP_CASE(NODE)

FixedVssegIntrIds

static const Intrinsic::ID FixedVssegIntrIds[]

Definition:RISCVISelLowering.cpp:22394

getVSlidedown

static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)

Definition:RISCVISelLowering.cpp:3342

RISCVISelLowering.h

getMaskTypeFor

static LLT getMaskTypeFor(LLT VecTy)

Return the type of the mask type suitable for masking the provided vector type.

Definition:RISCVLegalizerInfo.cpp:952

getRISCVWOpcode

static unsigned getRISCVWOpcode(unsigned Opcode)

Definition:RISCVLegalizerInfo.cpp:1287

RISCVMachineFunctionInfo.h

RISCVMatInt.h

Cond

const SmallVectorImpl< MachineOperand > & Cond

Definition:RISCVRedundantCopyElimination.cpp:75

auto CC

Definition:RISCVRedundantCopyElimination.cpp:79

RISCVRegisterInfo.h

RISCVSelectionDAGInfo.h

RISCVSubtarget.h

RISCV.h

ROTR

#define ROTR(x, n)

Definition:SHA256.cpp:32

assert

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

getValueType

static Type * getValueType(Value *V)

Returns the type of the given value/instruction V.

Definition:SLPVectorizer.cpp:243

isCommutative

static bool isCommutative(Instruction *I)

Definition:SLPVectorizer.cpp:509

SelectionDAGAddressAnalysis.h

ROTL

#define ROTL(x, b)

Definition:SipHash.cpp:32

SmallSet.h

This file defines the SmallSet class.

Statistic.h

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC

#define STATISTIC(VARNAME, DESC)

Definition:Statistic.h:166

getType

static SymbolRef::Type getType(const Symbol *Sym)

Definition:TapiFile.cpp:39

Ptr

@ Ptr

Definition:TargetLibraryInfo.cpp:77

TargetLoweringObjectFileImpl.h

UndefPoisonKind::PoisonOnly

@ PoisonOnly

ValueTypes.h

VectorUtils.h

Concat

static constexpr int Concat[]

Definition:X86InterleavedAccess.cpp:232

RHS

Value * RHS

Definition:X86PartialReduction.cpp:74

LHS

Value * LHS

Definition:X86PartialReduction.cpp:73

llvm::APFloat

Definition:APFloat.h:904

llvm::APFloat::convertFromAPInt

opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)

Definition:APFloat.h:1334

llvm::APFloat::convertToInteger

opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const

Definition:APFloat.h:1326

llvm::APFloat::getNaN

static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)

Factory for NaN values.

Definition:APFloat.h:1111

llvm::APInt

Class for arbitrary precision integers.

Definition:APInt.h:78

llvm::APInt::getSignMask

static APInt getSignMask(unsigned BitWidth)

Get the SignMask for a specific bit width.

Definition:APInt.h:229

llvm::APInt::getZExtValue

uint64_t getZExtValue() const

Get zero extended value.

Definition:APInt.h:1520

llvm::APInt::setBitsFrom

void setBitsFrom(unsigned loBit)

Set the top bits starting from loBit.

Definition:APInt.h:1386

llvm::APInt::getActiveBits

unsigned getActiveBits() const

Compute the number of active bits in the value.

Definition:APInt.h:1492

llvm::APInt::trunc

APInt trunc(unsigned width) const

Truncate to new width.

Definition:APInt.cpp:910

llvm::APInt::setBit

void setBit(unsigned BitPosition)

Set the given bit to 1 whose position is given as "bitPosition".

Definition:APInt.h:1330

llvm::APInt::sgt

bool sgt(const APInt &RHS) const

Signed greater than comparison.

Definition:APInt.h:1201

llvm::APInt::isAllOnes

bool isAllOnes() const

Determine if all bits are set. This is true for zero-width values.

Definition:APInt.h:371

llvm::APInt::ugt

bool ugt(const APInt &RHS) const

Unsigned greater than comparison.

Definition:APInt.h:1182

llvm::APInt::isZero

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

Definition:APInt.h:380

llvm::APInt::getSignedMaxValue

static APInt getSignedMaxValue(unsigned numBits)

Gets maximum signed value of APInt for a specific bit width.

Definition:APInt.h:209

llvm::APInt::isNegative

bool isNegative() const

Determine sign of this APInt.

Definition:APInt.h:329

llvm::APInt::sdiv

APInt sdiv(const APInt &RHS) const

Signed division function for APInt.

Definition:APInt.cpp:1618

llvm::APInt::clearAllBits

void clearAllBits()

Set every bit to 0.

Definition:APInt.h:1397

llvm::APInt::isSignedIntN

bool isSignedIntN(unsigned N) const

Check if this APInt has an N-bits signed integer value.

Definition:APInt.h:435

llvm::APInt::getSplat

static APInt getSplat(unsigned NewLen, const APInt &V)

Return a value containing V broadcasted over NewLen bits.

Definition:APInt.cpp:624

llvm::APInt::getSignedMinValue

static APInt getSignedMinValue(unsigned numBits)

Gets minimum signed value of APInt for a specific bit width.

Definition:APInt.h:219

llvm::APInt::getSignificantBits

unsigned getSignificantBits() const

Get the minimum bit size for this signed APInt.

Definition:APInt.h:1511

llvm::APInt::insertBits

void insertBits(const APInt &SubBits, unsigned bitPosition)

Insert the bits from a smaller APInt starting at bitPosition.

Definition:APInt.cpp:370

llvm::APInt::srem

APInt srem(const APInt &RHS) const

Function for signed remainder operation.

Definition:APInt.cpp:1710

llvm::APInt::isMask

bool isMask(unsigned numBits) const

Definition:APInt.h:488

llvm::APInt::isNonNegative

bool isNonNegative() const

Determine if this APInt Value is non-negative (>= 0)

Definition:APInt.h:334

llvm::APInt::sext

APInt sext(unsigned width) const

Sign extend to a new width.

Definition:APInt.cpp:959

llvm::APInt::isSubsetOf

bool isSubsetOf(const APInt &RHS) const

This operation checks that all bits set in this APInt are also set in RHS.

Definition:APInt.h:1257

llvm::APInt::isPowerOf2

bool isPowerOf2() const

Check if this APInt's value is a power of two greater than zero.

Definition:APInt.h:440

llvm::APInt::getLowBitsSet

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

Definition:APInt.h:306

llvm::APInt::slt

bool slt(const APInt &RHS) const

Signed less than comparison.

Definition:APInt.h:1130

llvm::APInt::getHighBitsSet

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

Definition:APInt.h:296

llvm::APInt::setLowBits

void setLowBits(unsigned loBits)

Set the bottom loBits bits.

Definition:APInt.h:1389

llvm::APInt::extractBits

APInt extractBits(unsigned numBits, unsigned bitPosition) const

Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).

Definition:APInt.cpp:455

llvm::APInt::getBitsSetFrom

static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)

Constructs an APInt value that has a contiguous range of bits set.

Definition:APInt.h:286

llvm::APInt::getOneBitSet

static APInt getOneBitSet(unsigned numBits, unsigned BitNo)

Return an APInt with exactly one bit set in the result.

Definition:APInt.h:239

llvm::APInt::getSExtValue

int64_t getSExtValue() const

Get sign extended value.

Definition:APInt.h:1542

llvm::APInt::uge

bool uge(const APInt &RHS) const

Unsigned greater or equal comparison.

Definition:APInt.h:1221

llvm::APSInt

An arbitrary precision integer that knows its signedness.

Definition:APSInt.h:23

llvm::AllocaInst

an instruction to allocate memory on the stack

Definition:Instructions.h:63

llvm::Argument

This class represents an incoming formal argument to a Function.

Definition:Argument.h:31

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition:ArrayRef.h:41

llvm::ArrayRef::end

iterator end() const

Definition:ArrayRef.h:157

llvm::ArrayRef::size

size_t size() const

size - Get the array size.

Definition:ArrayRef.h:168

llvm::ArrayRef::begin

iterator begin() const

Definition:ArrayRef.h:156

llvm::ArrayRef::slice

ArrayRef< T > slice(size_t N, size_t M) const

slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.

Definition:ArrayRef.h:198

llvm::AtomicCmpXchgInst

An instruction that atomically checks whether a specified value is in a memory location,...

Definition:Instructions.h:501

llvm::AtomicCmpXchgInst::getCompareOperand

Value * getCompareOperand()

Definition:Instructions.h:633

llvm::AtomicRMWInst

an instruction that atomically reads a memory location, combines it with another value,...

Definition:Instructions.h:704

llvm::AtomicRMWInst::getAlign

Align getAlign() const

Return the alignment of the memory that is being allocated by the instruction.

Definition:Instructions.h:827

llvm::AtomicRMWInst::BinOp

BinOp

This enumeration lists the possible modifications atomicrmw can make.

Definition:Instructions.h:716

llvm::AtomicRMWInst::Add

@ Add

*p = old + v

Definition:Instructions.h:720

llvm::AtomicRMWInst::USubCond

@ USubCond

Subtract only if no unsigned overflow.

Definition:Instructions.h:764

llvm::AtomicRMWInst::Min

@ Min

*p = old <signed v ? old : v

Definition:Instructions.h:734

llvm::AtomicRMWInst::Or

@ Or

*p = old | v

Definition:Instructions.h:728

llvm::AtomicRMWInst::Sub

@ Sub

*p = old - v

Definition:Instructions.h:722

llvm::AtomicRMWInst::And

@ And

*p = old & v

Definition:Instructions.h:724

llvm::AtomicRMWInst::USubSat

@ USubSat

*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.

Definition:Instructions.h:768

llvm::AtomicRMWInst::UIncWrap

@ UIncWrap

Increment one up to a maximum value.

Definition:Instructions.h:756

llvm::AtomicRMWInst::Max

@ Max

*p = old >signed v ? old : v

Definition:Instructions.h:732

llvm::AtomicRMWInst::UMin

@ UMin

*p = old <unsigned v ? old : v

Definition:Instructions.h:738

llvm::AtomicRMWInst::UMax

@ UMax

*p = old >unsigned v ? old : v

Definition:Instructions.h:736

llvm::AtomicRMWInst::UDecWrap

@ UDecWrap

Decrement one until a minimum value or zero.

Definition:Instructions.h:760

llvm::AtomicRMWInst::Xchg

@ Xchg

*p = v

Definition:Instructions.h:718

llvm::AtomicRMWInst::Nand

@ Nand

*p = ~(old & v)

Definition:Instructions.h:726

llvm::AtomicRMWInst::isFloatingPointOperation

bool isFloatingPointOperation() const

Definition:Instructions.h:882

llvm::AtomicRMWInst::getOperation

BinOp getOperation() const

Definition:Instructions.h:805

llvm::AtomicRMWInst::getValOperand

Value * getValOperand()

Definition:Instructions.h:874

llvm::AtomicRMWInst::getOrdering

AtomicOrdering getOrdering() const

Returns the ordering constraint of this rmw instruction.

Definition:Instructions.h:847

llvm::AttributeList

Definition:Attributes.h:490

llvm::AttributeList::hasFnAttr

bool hasFnAttr(Attribute::AttrKind Kind) const

Return true if the attribute exists for the function.

Definition:Attributes.cpp:1877

llvm::Attribute::getValueAsString

StringRef getValueAsString() const

Return the attribute's value as a string.

Definition:Attributes.cpp:392

llvm::Attribute::getWithAlignment

static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)

Return a uniquified Attribute object that has the specific alignment set.

Definition:Attributes.cpp:234

llvm::BaseIndexOffset::match

static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)

Parses tree in N for base, index, offset addresses.

Definition:SelectionDAGAddressAnalysis.cpp:301

llvm::BasicBlock

LLVM Basic Block Representation.

Definition:BasicBlock.h:61

llvm::BasicBlock::getModule

const Module * getModule() const

Return the module owning the function this basic block belongs to, or nullptr if the function does no...

Definition:BasicBlock.cpp:292

llvm::BitVector

Definition:BitVector.h:82

llvm::BitVector::test

bool test(unsigned Idx) const

Definition:BitVector.h:461

llvm::BitVector::set

BitVector & set()

Definition:BitVector.h:351

llvm::BitVector::all

bool all() const

all - Returns true if all bits are set.

Definition:BitVector.h:175

llvm::BlockAddressSDNode

Definition:SelectionDAGNodes.h:2314

llvm::CCState

CCState - This class holds information needed while lowering arguments and return values.

Definition:CallingConvLower.h:170

llvm::CCState::getFirstUnallocated

unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const

getFirstUnallocated - Return the index of the first unallocated register in the set,...

Definition:CallingConvLower.h:315

llvm::CCState::AnalyzeCallOperands

void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)

AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...

Definition:CallingConvLower.cpp:126

llvm::CCState::getStackSize

uint64_t getStackSize() const

Returns the size of the currently allocated portion of the stack.

Definition:CallingConvLower.h:245

llvm::CCState::AnalyzeFormalArguments

void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)

AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...

Definition:CallingConvLower.cpp:85

llvm::CCValAssign

CCValAssign - Represent assignment of one arg/retval to a location.

Definition:CallingConvLower.h:33

llvm::CCValAssign::isRegLoc

bool isRegLoc() const

Definition:CallingConvLower.h:122

llvm::CCValAssign::getLocReg

Definition:CallingConvLower.h:128

llvm::CCValAssign::getLocInfo

LocInfo getLocInfo() const

Definition:CallingConvLower.h:134

llvm::CCValAssign::BCvt

@ BCvt

Definition:CallingConvLower.h:46

llvm::CCValAssign::Full

@ Full

Definition:CallingConvLower.h:36

llvm::CCValAssign::Indirect

@ Indirect

Definition:CallingConvLower.h:52

llvm::CCValAssign::needsCustom

bool needsCustom() const

Definition:CallingConvLower.h:126

llvm::CCValAssign::getValVT

MVT getValVT() const

Definition:CallingConvLower.h:120

llvm::CCValAssign::isMemLoc

bool isMemLoc() const

Definition:CallingConvLower.h:123

llvm::CCValAssign::getLocMemOffset

int64_t getLocMemOffset() const

Definition:CallingConvLower.h:129

llvm::CCValAssign::getLocVT

MVT getLocVT() const

Definition:CallingConvLower.h:132

llvm::CallBase::isMustTailCall

bool isMustTailCall() const

Tests if this call site must be tail call optimized.

Definition:Instructions.cpp:343

llvm::CallBase::isIndirectCall

bool isIndirectCall() const

Return true if the callsite is an indirect call.

Definition:Instructions.cpp:334

llvm::CallBase::addParamAttr

void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)

Adds the attribute to the indicated argument.

Definition:InstrTypes.h:1494

llvm::CallInst

This class represents a function call, abstracting a target machine's calling convention.

Definition:Instructions.h:1479

llvm::CallInst::isTailCall

bool isTailCall() const

Definition:Instructions.h:1589

llvm::ConstantFPSDNode

Definition:SelectionDAGNodes.h:1739

llvm::ConstantFPSDNode::isExactlyValue

bool isExactlyValue(double V) const

We don't rely on operator== working on double values, as it returns true for things that are clearly ...

Definition:SelectionDAGNodes.h:1775

llvm::ConstantInt

This is the shared class of boolean and integer constants.

Definition:Constants.h:83

llvm::ConstantInt::isMinusOne

bool isMinusOne() const

This function will return true iff every bit in this constant is set to true.

Definition:Constants.h:220

llvm::ConstantInt::isZero

bool isZero() const

This is just a convenience method to make client code smaller for a common code.

Definition:Constants.h:208

llvm::ConstantInt::getZExtValue

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

Definition:Constants.h:157

llvm::ConstantPoolSDNode

Definition:SelectionDAGNodes.h:2002

llvm::ConstantSDNode

Definition:SelectionDAGNodes.h:1684

llvm::ConstantSDNode::getZExtValue

uint64_t getZExtValue() const

Definition:SelectionDAGNodes.h:1701

llvm::ConstantSDNode::getAPIntValue

const APInt & getAPIntValue() const

Definition:SelectionDAGNodes.h:1700

llvm::ConstantSDNode::isOpaque

bool isOpaque() const

Definition:SelectionDAGNodes.h:1715

llvm::ConstantSDNode::isZero

bool isZero() const

Definition:SelectionDAGNodes.h:1710

llvm::Constant

This is an important base class in LLVM.

Definition:Constant.h:42

llvm::Constant::getAllOnesValue

static Constant * getAllOnesValue(Type *Ty)

Definition:Constants.cpp:420

llvm::DWARFExpression::Operation

This class represents an Operation in the Expression.

Definition:DWARFExpression.h:32

llvm::DWARFExpression::Operation::getNumOperands

uint64_t getNumOperands() const

Definition:DWARFExpression.h:90

llvm::DataLayout

A parsed version of the target data layout string in and methods for querying it.

Definition:DataLayout.h:63

llvm::DataLayout::getPointerSizeInBits

unsigned getPointerSizeInBits(unsigned AS=0) const

Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...

Definition:DataLayout.h:364

llvm::DataLayout::getPrefTypeAlign

Align getPrefTypeAlign(Type *Ty) const

Returns the preferred stack/global alignment for the specified type.

Definition:DataLayout.cpp:847

llvm::DebugLoc

A debug info location.

Definition:DebugLoc.h:33

llvm::DemandedBits

Definition:DemandedBits.h:40

llvm::DenseMapBase::size

unsigned size() const

Definition:DenseMap.h:99

llvm::DenseMapBase::at

const ValueT & at(const_arg_type_t< KeyT > Val) const

at - Return the entry for the specified key, or abort if no such entry exists.

Definition:DenseMap.h:202

llvm::DenseMapBase::contains

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

Definition:DenseMap.h:147

llvm::DenseMap

Definition:DenseMap.h:727

llvm::DenseSet

Implements a dense probed hash-table based set.

Definition:DenseSet.h:278

llvm::DiagnosticInfoUnsupported

Diagnostic information for unsupported feature in backend.

Definition:DiagnosticInfo.h:1097

llvm::ElementCount

Definition:TypeSize.h:300

llvm::ElementCount::getScalable

static constexpr ElementCount getScalable(ScalarTy MinVal)

Definition:TypeSize.h:314

llvm::ElementCount::getFixed

static constexpr ElementCount getFixed(ScalarTy MinVal)

Definition:TypeSize.h:311

llvm::ExternalSymbolSDNode

Definition:SelectionDAGNodes.h:2356

llvm::FixedVectorType::get

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

Definition:Type.cpp:791

llvm::FunctionType

Class to represent function types.

Definition:DerivedTypes.h:105

llvm::FunctionType::getParamType

Type * getParamType(unsigned i) const

Parameter type accessors.

Definition:DerivedTypes.h:137

llvm::FunctionType::getReturnType

Type * getReturnType() const

Definition:DerivedTypes.h:126

llvm::Function

Definition:Function.h:63

llvm::Function::getFunctionType

FunctionType * getFunctionType() const

Returns the FunctionType for me.

Definition:Function.h:216

llvm::Function::getFnAttribute

Attribute getFnAttribute(Attribute::AttrKind Kind) const

Return the attribute for the given attribute kind.

Definition:Function.cpp:766

llvm::Function::getFnAttributeAsParsedInteger

uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const

For a string attribute Kind, parse attribute as an integer.

Definition:Function.cpp:778

llvm::Function::hasMinSize

bool hasMinSize() const

Optimize this function for minimum size (-Oz).

Definition:Function.h:704

llvm::Function::getCallingConv

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

Definition:Function.h:277

llvm::Function::getAttributes

AttributeList getAttributes() const

Return the attribute list for this Function.

Definition:Function.h:353

llvm::Function::getContext

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

Definition:Function.cpp:369

llvm::Function::getArg

Argument * getArg(unsigned i) const

Definition:Function.h:886

llvm::Function::hasFnAttribute

bool hasFnAttribute(Attribute::AttrKind Kind) const

Return true if the function has the attribute.

Definition:Function.cpp:731

llvm::GISelAddressing::BaseIndexOffset

Helper struct to store a base, index and offset that forms an address.

Definition:LoadStoreOpt.h:38

llvm::GISelAddressing::BaseIndexOffset::getOffset

int64_t getOffset() const

Definition:LoadStoreOpt.h:54

llvm::GlobalAddressSDNode

Definition:SelectionDAGNodes.h:1876

llvm::GlobalValue

Definition:GlobalValue.h:48

llvm::GlobalValue::isDSOLocal

bool isDSOLocal() const

Definition:GlobalValue.h:306

llvm::GlobalValue::hasExternalWeakLinkage

bool hasExternalWeakLinkage() const

Definition:GlobalValue.h:530

llvm::HexagonInstrInfo::storeRegToStackSlot

void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override

Store the specified register of the given register class to the specified stack frame index.

Definition:HexagonInstrInfo.cpp:962

llvm::HexagonInstrInfo::loadRegFromStackSlot

void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override

Load the specified register of the given register class from the specified stack frame index.

Definition:HexagonInstrInfo.cpp:1011

llvm::IRBuilderBase

Common base class shared among various IRBuilders.

Definition:IRBuilder.h:113

llvm::IRBuilderBase::CreateConstGEP1_32

Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")

Definition:IRBuilder.h:1887

llvm::IRBuilderBase::CreateInsertValue

Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")

Definition:IRBuilder.h:2562

llvm::IRBuilderBase::CreateExtractValue

Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")

Definition:IRBuilder.h:2555

llvm::IRBuilderBase::CreateFence

FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")

Definition:IRBuilder.h:1842

llvm::IRBuilderBase::CreateSExt

Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")

Definition:IRBuilder.h:2045

llvm::IRBuilderBase::getInt32Ty

IntegerType * getInt32Ty()

Fetch the type representing a 32-bit integer.

Definition:IRBuilder.h:545

llvm::IRBuilderBase::CreatePtrAdd

Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())

Definition:IRBuilder.h:1987

llvm::IRBuilderBase::GetInsertBlock

BasicBlock * GetInsertBlock() const

Definition:IRBuilder.h:193

llvm::IRBuilderBase::getInt64Ty

IntegerType * getInt64Ty()

Fetch the type representing a 64-bit integer.

Definition:IRBuilder.h:550

llvm::IRBuilderBase::getAllOnesMask

Value * getAllOnesMask(ElementCount NumElts)

Return an all true boolean vector (mask) with NumElts lanes.

Definition:IRBuilder.h:867

llvm::IRBuilderBase::CreateIntrinsic

CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

Definition:IRBuilder.cpp:900

llvm::IRBuilderBase::getInt32

ConstantInt * getInt32(uint32_t C)

Get a constant 32-bit value.

Definition:IRBuilder.h:505

llvm::IRBuilderBase::CreateNot

Value * CreateNot(Value *V, const Twine &Name="")

Definition:IRBuilder.h:1757

llvm::IRBuilderBase::CreateSub

Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Definition:IRBuilder.h:1387

llvm::IRBuilderBase::getIntN

ConstantInt * getIntN(unsigned N, uint64_t C)

Get a constant N-bit value, zero extended or truncated from a 64-bit value.

Definition:IRBuilder.h:516

llvm::IRBuilderBase::CreateShuffleVector

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Definition:IRBuilder.h:2533

llvm::IRBuilderBase::CreateCall

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

Definition:IRBuilder.h:2449

llvm::IRBuilderBase::CreateAtomicRMW

AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)

Definition:IRBuilder.h:1862

llvm::IRBuilderBase::CreateTrunc

Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)

Definition:IRBuilder.h:2019

llvm::IRBuilderBase::getInt8Ty

IntegerType * getInt8Ty()

Fetch the type representing an 8-bit integer.

Definition:IRBuilder.h:535

llvm::IRBuilder

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Definition:IRBuilder.h:2705

llvm::InlineAsm::ConstraintCode

ConstraintCode

Definition:InlineAsm.h:239

llvm::InlineAsm::ConstraintCode::A

@ A

llvm::InstructionCost

Definition:InstructionCost.h:29

llvm::InstructionCost::getInvalid

static InstructionCost getInvalid(CostType Val=0)

Definition:InstructionCost.h:73

llvm::Instruction

Definition:Instruction.h:68

llvm::Instruction::getModule

const Module * getModule() const

Return the module owning the function this instruction belongs to or nullptr it the function does not...

Definition:Instruction.cpp:68

llvm::Instruction::getOpcode

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

Definition:Instruction.h:291

llvm::Instruction::getDataLayout

const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

Definition:Instruction.cpp:76

llvm::IntegerType

Class to represent integer types.

Definition:DerivedTypes.h:42

llvm::JumpTableSDNode

Definition:SelectionDAGNodes.h:1981

llvm::LLT

Definition:LowLevelType.h:39

llvm::LLVMContext

This is an important class for using LLVM in a threaded context.

Definition:LLVMContext.h:67

llvm::LLVMContext::diagnose

void diagnose(const DiagnosticInfo &DI)

Report a message to the currently installed diagnostic handler.

Definition:LLVMContext.cpp:245

llvm::LSBaseSDNode

Base class for LoadSDNode and StoreSDNode.

Definition:SelectionDAGNodes.h:2431

llvm::LSBaseSDNode::isIndexed

bool isIndexed() const

Return true if this is a pre/post inc/dec load/store.

Definition:SelectionDAGNodes.h:2452

llvm::LoadInst

An instruction for reading from memory.

Definition:Instructions.h:176

llvm::LoadInst::getPointerAddressSpace

unsigned getPointerAddressSpace() const

Returns the address space of the pointer operand.

Definition:Instructions.h:261

llvm::LoadInst::getPointerOperand

Value * getPointerOperand()

Definition:Instructions.h:255

llvm::LoadInst::getPointerOperandType

Type * getPointerOperandType() const

Definition:Instructions.h:258

llvm::LoadInst::isSimple

bool isSimple() const

Definition:Instructions.h:247

llvm::LoadInst::getAlign

Align getAlign() const

Return the alignment of the access that is being performed.

Definition:Instructions.h:211

llvm::LoadSDNode

This class is used to represent ISD::LOAD nodes.

Definition:SelectionDAGNodes.h:2464

llvm::LoadSDNode::getBasePtr

const SDValue & getBasePtr() const

Definition:SelectionDAGNodes.h:2483

llvm::LocationSize::beforeOrAfterPointer

static constexpr LocationSize beforeOrAfterPointer()

Any location before or after the base pointer (but still within the underlying object).

Definition:MemoryLocation.h:137

llvm::MCContext

Context object for machine code objects.

Definition:MCContext.h:83

llvm::MCExpr

Base class for the full range of assembler expressions which are needed for parsing.

Definition:MCExpr.h:34

llvm::MCInstBuilder

Definition:MCInstBuilder.h:21

llvm::MCInst

Instances of this class represent a single low-level machine instruction.

Definition:MCInst.h:185

llvm::MCObjectFileInfo::getContext

MCContext & getContext() const

Definition:MCObjectFileInfo.h:252

llvm::MCSubtargetInfo

Generic base class for all target subtargets.

Definition:MCSubtargetInfo.h:76

llvm::MCSymbolRefExpr::create

static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)

Definition:MCExpr.h:398

llvm::MDNode

Metadata node.

Definition:Metadata.h:1073

llvm::MDNode::getOperand

const MDOperand & getOperand(unsigned I) const

Definition:Metadata.h:1434

llvm::MVT

Machine Value Type.

Definition:MachineValueType.h:35

llvm::MVT::getFloatingPointVT

static MVT getFloatingPointVT(unsigned BitWidth)

Definition:MachineValueType.h:431

llvm::MVT::SimpleValueType

SimpleValueType

Definition:MachineValueType.h:37

llvm::MVT::integer_fixedlen_vector_valuetypes

static auto integer_fixedlen_vector_valuetypes()

Definition:MachineValueType.h:554

llvm::MVT::getVectorMinNumElements

unsigned getVectorMinNumElements() const

Given a vector type, return the minimum number of elements it contains.

Definition:MachineValueType.h:277

llvm::MVT::isRISCVVectorTuple

bool isRISCVVectorTuple() const

Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.

Definition:MachineValueType.h:120

llvm::MVT::SimpleTy

SimpleValueType SimpleTy

Definition:MachineValueType.h:55

llvm::MVT::getScalarSizeInBits

uint64_t getScalarSizeInBits() const

Definition:MachineValueType.h:346

llvm::MVT::changeVectorElementType

MVT changeVectorElementType(MVT EltVT) const

Return a VT for a vector type whose attributes match ourselves with the exception of the element type...

Definition:MachineValueType.h:207

llvm::MVT::bitsLE

bool bitsLE(MVT VT) const

Return true if this has no more bits than VT.

Definition:MachineValueType.h:425

llvm::MVT::getVectorNumElements

unsigned getVectorNumElements() const

Definition:MachineValueType.h:294

llvm::MVT::getRISCVVectorTupleVT

static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)

Definition:MachineValueType.h:471

llvm::MVT::isVector

bool isVector() const

Return true if this is a vector value type.

Definition:MachineValueType.h:106

llvm::MVT::isInteger

bool isInteger() const

Return true if this is an integer or a vector integer type.

Definition:MachineValueType.h:90

llvm::MVT::isScalableVector

bool isScalableVector() const

Return true if this is a vector value type where the runtime length is machine dependent.

Definition:MachineValueType.h:113

llvm::MVT::getScalableVectorVT

static MVT getScalableVectorVT(MVT VT, unsigned NumElements)

Definition:MachineValueType.h:461

llvm::MVT::getRISCVVectorTupleNumFields

unsigned getRISCVVectorTupleNumFields() const

Given a RISC-V vector tuple type, return the num_fields.

Definition:MachineValueType.h:482

llvm::MVT::changeTypeToInteger

MVT changeTypeToInteger()

Return the type converted to an equivalently sized integer or vector with integer element type.

Definition:MachineValueType.h:217

llvm::MVT::getVT

static MVT getVT(Type *Ty, bool HandleUnknown=false)

Return the value type corresponding to the specified type.

Definition:ValueTypes.cpp:237

llvm::MVT::bitsLT

bool bitsLT(MVT VT) const

Return true if this has less bits than VT.

Definition:MachineValueType.h:418

llvm::MVT::getSizeInBits

TypeSize getSizeInBits() const

Returns the size of the specified MVT in bits.

Definition:MachineValueType.h:308

llvm::MVT::isPow2VectorType

bool isPow2VectorType() const

Returns true if the given vector is a power of 2.

Definition:MachineValueType.h:241

llvm::MVT::getFixedSizeInBits

uint64_t getFixedSizeInBits() const

Return the size of the specified fixed width value type in bits.

Definition:MachineValueType.h:342

llvm::MVT::getFltSemantics

const fltSemantics & getFltSemantics() const

Returns an APFloat semantics tag appropriate for the value type.

Definition:ValueTypes.cpp:307

llvm::MVT::bitsGT

bool bitsGT(MVT VT) const

Return true if this has more bits than VT.

Definition:MachineValueType.h:404

llvm::MVT::isFixedLengthVector

bool isFixedLengthVector() const

Definition:MachineValueType.h:135

llvm::MVT::getVectorElementCount

ElementCount getVectorElementCount() const

Definition:MachineValueType.h:290

llvm::MVT::getStoreSize

TypeSize getStoreSize() const

Return the number of bytes overwritten by a store of the specified value type.

Definition:MachineValueType.h:356

llvm::MVT::bitsGE

bool bitsGE(MVT VT) const

Return true if this has no less bits than VT.

Definition:MachineValueType.h:411

llvm::MVT::isScalarInteger

bool isScalarInteger() const

Return true if this is an integer, not including vectors.

Definition:MachineValueType.h:100

llvm::MVT::getVectorVT

static MVT getVectorVT(MVT VT, unsigned NumElements)

Definition:MachineValueType.h:451

llvm::MVT::getVectorElementType

MVT getVectorElementType() const

Definition:MachineValueType.h:263

llvm::MVT::isFloatingPoint

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

Definition:MachineValueType.h:80

llvm::MVT::isValid

bool isValid() const

Return true if this is a valid simple valuetype.

Definition:MachineValueType.h:74

llvm::MVT::getIntegerVT

static MVT getIntegerVT(unsigned BitWidth)

Definition:MachineValueType.h:441

llvm::MVT::getDoubleNumVectorElementsVT

MVT getDoubleNumVectorElementsVT() const

Definition:MachineValueType.h:234

llvm::MVT::getHalfNumVectorElementsVT

MVT getHalfNumVectorElementsVT() const

Return a VT for a vector type with the same element type but half the number of elements.

Definition:MachineValueType.h:225

llvm::MVT::getScalarType

MVT getScalarType() const

If this is a vector, return the element type, otherwise return this.

Definition:MachineValueType.h:259

llvm::MVT::integer_scalable_vector_valuetypes

static auto integer_scalable_vector_valuetypes()

Definition:MachineValueType.h:566

llvm::MVT::changeVectorElementTypeToInteger

MVT changeVectorElementTypeToInteger() const

Return a vector with the same number of elements as this vector, but with the element type converted ...

Definition:MachineValueType.h:196

llvm::MVT::fp_fixedlen_vector_valuetypes

static auto fp_fixedlen_vector_valuetypes()

Definition:MachineValueType.h:560

llvm::MachineBasicBlock

Definition:MachineBasicBlock.h:125

llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs

void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)

Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...

Definition:MachineBasicBlock.cpp:937

llvm::MachineBasicBlock::getSymbol

MCSymbol * getSymbol() const

Return the MCSymbol for this basic block.

Definition:MachineBasicBlock.cpp:63

llvm::MachineBasicBlock::push_back

void push_back(MachineInstr *MI)

Definition:MachineBasicBlock.h:1002

llvm::MachineBasicBlock::setCallFrameSize

void setCallFrameSize(unsigned N)

Set the call frame size on entry to this basic block.

Definition:MachineBasicBlock.h:1223

llvm::MachineBasicBlock::getBasicBlock

const BasicBlock * getBasicBlock() const

Return the LLVM basic block that this instance corresponded to originally.

Definition:MachineBasicBlock.h:256

llvm::MachineBasicBlock::addSuccessor

void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())

Add Succ as a successor of this MachineBasicBlock.

Definition:MachineBasicBlock.cpp:798

llvm::MachineBasicBlock::begin

iterator begin()

Definition:MachineBasicBlock.h:355

llvm::MachineBasicBlock::findDebugLoc

DebugLoc findDebugLoc(instr_iterator MBBI)

Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.

Definition:MachineBasicBlock.cpp:1516

llvm::MachineBasicBlock::instr_iterator

Instructions::iterator instr_iterator

Definition:MachineBasicBlock.h:314

llvm::MachineBasicBlock::instr_end

instr_iterator instr_end()

Definition:MachineBasicBlock.h:341

llvm::MachineBasicBlock::end

iterator end()

Definition:MachineBasicBlock.h:357

llvm::MachineBasicBlock::getParent

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

Definition:MachineBasicBlock.h:311

llvm::MachineBasicBlock::splice

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

Definition:MachineBasicBlock.h:1109

llvm::MachineBasicBlock::iterator

MachineInstrBundleIterator< MachineInstr > iterator

Definition:MachineBasicBlock.h:319

llvm::MachineFrameInfo

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

Definition:MachineFrameInfo.h:106

llvm::MachineFrameInfo::CreateFixedObject

int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)

Create a new object at a fixed location on the stack.

Definition:MachineFrameInfo.cpp:83

llvm::MachineFrameInfo::CreateStackObject

int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)

Create a new statically sized stack object, returning a nonnegative identifier to represent it.

Definition:MachineFrameInfo.cpp:51

llvm::MachineFrameInfo::setFrameAddressIsTaken

void setFrameAddressIsTaken(bool T)

Definition:MachineFrameInfo.h:374

llvm::MachineFrameInfo::setHasTailCall

void setHasTailCall(bool V=true)

Definition:MachineFrameInfo.h:647

llvm::MachineFrameInfo::setReturnAddressIsTaken

void setReturnAddressIsTaken(bool s)

Definition:MachineFrameInfo.h:380

llvm::MachineFunctionProperties::Property::NoPHIs

@ NoPHIs

llvm::MachineFunction

Definition:MachineFunction.h:267

llvm::MachineFunction::getSubtarget

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Definition:MachineFunction.h:733

llvm::MachineFunction::getMachineMemOperand

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

Definition:MachineFunction.cpp:536

llvm::MachineFunction::getFrameInfo

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

Definition:MachineFunction.h:749

llvm::MachineFunction::getRegInfo

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Definition:MachineFunction.h:743

llvm::MachineFunction::getDataLayout

const DataLayout & getDataLayout() const

Return the DataLayout attached to the Module associated to this MF.

Definition:MachineFunction.cpp:309

llvm::MachineFunction::getFunction

Function & getFunction()

Return the LLVM function that this machine code represents.

Definition:MachineFunction.h:704

llvm::MachineFunction::getInfo

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Definition:MachineFunction.h:831

llvm::MachineFunction::addLiveIn

addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...

Definition:MachineFunction.cpp:762

llvm::MachineFunction::CreateMachineBasicBlock

MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)

CreateMachineBasicBlock - Allocate a new MachineBasicBlock.

Definition:MachineFunction.cpp:499

llvm::MachineFunction::insert

void insert(iterator MBBI, MachineBasicBlock *MBB)

Definition:MachineFunction.h:966

llvm::MachineInstrBuilder::addImm

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

Definition:MachineInstrBuilder.h:133

llvm::MachineInstrBuilder::add

const MachineInstrBuilder & add(const MachineOperand &MO) const

Definition:MachineInstrBuilder.h:226

llvm::MachineInstrBuilder::addFrameIndex

const MachineInstrBuilder & addFrameIndex(int Idx) const

Definition:MachineInstrBuilder.h:154

llvm::MachineInstrBuilder::addReg

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Definition:MachineInstrBuilder.h:99

llvm::MachineInstrBuilder::addMBB

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

Definition:MachineInstrBuilder.h:148

llvm::MachineInstrBuilder::addMemOperand

const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const

Definition:MachineInstrBuilder.h:204

llvm::MachineInstrBuilder::getInstr

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

Definition:MachineInstrBuilder.h:91

llvm::MachineInstrBundleIterator< MachineInstr >

llvm::MachineInstr

Representation of each machine instruction.

Definition:MachineInstr.h:71

llvm::MachineInstr::collectDebugValues

void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)

Scan instructions immediately following MI and collect any matching DBG_VALUEs.

Definition:MachineInstr.cpp:2464

llvm::MachineInstr::NoFPExcept

@ NoFPExcept

Definition:MachineInstr.h:113

llvm::MachineInstr::NoFlags

@ NoFlags

Definition:MachineInstr.h:86

llvm::MachineInstr::setFlag

void setFlag(MIFlag Flag)

Set a MI flag.

Definition:MachineInstr.h:406

llvm::MachineInstr::eraseFromParent

void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

Definition:MachineInstr.cpp:767

llvm::MachineInstr::getOperand

const MachineOperand & getOperand(unsigned i) const

Definition:MachineInstr.h:587

llvm::MachineJumpTableInfo

Definition:MachineJumpTableInfo.h:46

llvm::MachineJumpTableInfo::EK_Custom32

@ EK_Custom32

EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...

Definition:MachineJumpTableInfo.h:86

llvm::MachineMemOperand

A description of a memory reference used in the backend.

Definition:MachineMemOperand.h:129

llvm::MachineMemOperand::getRanges

const MDNode * getRanges() const

Return the range tag for the memory reference.

Definition:MachineMemOperand.h:269

llvm::MachineMemOperand::Flags

Flags

Flags values. These may be or'd together.

Definition:MachineMemOperand.h:132

llvm::MachineMemOperand::MOVolatile

@ MOVolatile

The memory access is volatile.

Definition:MachineMemOperand.h:140

llvm::MachineMemOperand::MODereferenceable

@ MODereferenceable

The memory access is dereferenceable (i.e., doesn't trap).

Definition:MachineMemOperand.h:144

llvm::MachineMemOperand::MOLoad

@ MOLoad

The memory access reads data.

Definition:MachineMemOperand.h:136

llvm::MachineMemOperand::MONonTemporal

@ MONonTemporal

The memory access is non-temporal.

Definition:MachineMemOperand.h:142

llvm::MachineMemOperand::MONone

@ MONone

Definition:MachineMemOperand.h:134

llvm::MachineMemOperand::MOInvariant

@ MOInvariant

The memory access always returns the same value (or traps).

Definition:MachineMemOperand.h:146

llvm::MachineMemOperand::MOStore

@ MOStore

The memory access writes data.

Definition:MachineMemOperand.h:138

llvm::MachineMemOperand::getPointerInfo

const MachinePointerInfo & getPointerInfo() const

Definition:MachineMemOperand.h:204

llvm::MachineMemOperand::getFlags

Flags getFlags() const

Return the raw flags of the source value,.

Definition:MachineMemOperand.h:224

llvm::MachineMemOperand::getAAInfo

AAMDNodes getAAInfo() const

Return the AA tags for the memory reference.

Definition:MachineMemOperand.h:266

llvm::MachineMemOperand::getBaseAlign

Align getBaseAlign() const

Return the minimum known alignment in bytes of the base address, without the offset.

Definition:MachineMemOperand.h:263

llvm::MachineOperand

MachineOperand class - Representation of each machine instruction operand.

Definition:MachineOperand.h:48

llvm::MachineOperand::getImm

int64_t getImm() const

Definition:MachineOperand.h:556

llvm::MachineOperand::CreateImm

static MachineOperand CreateImm(int64_t Val)

Definition:MachineOperand.h:820

llvm::MachineOperand::getReg

getReg - Returns the register number.

Definition:MachineOperand.h:369

llvm::MachineOperand::CreateReg

static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)

Definition:MachineOperand.h:838

llvm::MachineRegisterInfo

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

Definition:MachineRegisterInfo.h:51

llvm::MachineRegisterInfo::createVirtualRegister

createVirtualRegister - Create and return a new virtual register in the function with the specified r...

Definition:MachineRegisterInfo.cpp:156

llvm::MachineRegisterInfo::addLiveIn

void addLiveIn(MCRegister Reg, Register vreg=Register())

addLiveIn - Add the specified register as a live-in.

Definition:MachineRegisterInfo.h:1006

llvm::MemSDNode

This is an abstract virtual class for memory operations.

Definition:SelectionDAGNodes.h:1352

llvm::MemSDNode::isSimple

bool isSimple() const

Returns true if the memory operation is neither atomic or volatile.

Definition:SelectionDAGNodes.h:1429

llvm::MemSDNode::getMemOperand

MachineMemOperand * getMemOperand() const

Return a MachineMemOperand object describing the memory reference performed by operation.

Definition:SelectionDAGNodes.h:1436

llvm::MemSDNode::getChain

const SDValue & getChain() const

Definition:SelectionDAGNodes.h:1455

llvm::MemSDNode::getMemoryVT

EVT getMemoryVT() const

Return the type of the in-memory value.

Definition:SelectionDAGNodes.h:1432

llvm::MemoryLocation::UnknownSize

@ UnknownSize

Definition:MemoryLocation.h:232

llvm::Module

A Module instance is used to store all the information related to an LLVM module.

Definition:Module.h:65

llvm::PoisonValue::get

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

Definition:Constants.cpp:1878

llvm::RISCVConstantPoolValue

A RISCV-specific constant pool value.

Definition:RISCVConstantPoolValue.h:28

llvm::RISCVConstantPoolValue::Create

static RISCVConstantPoolValue * Create(const GlobalValue *GV)

Definition:RISCVConstantPoolValue.cpp:29

llvm::RISCVInstrInfo

Definition:RISCVInstrInfo.h:62

llvm::RISCVMachineFunctionInfo

RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...

Definition:RISCVMachineFunctionInfo.h:47

llvm::RISCVMachineFunctionInfo::setVarArgsFrameIndex

void setVarArgsFrameIndex(int Index)

Definition:RISCVMachineFunctionInfo.h:93

llvm::RISCVMachineFunctionInfo::getVarArgsFrameIndex

int getVarArgsFrameIndex() const

Definition:RISCVMachineFunctionInfo.h:92

llvm::RISCVMachineFunctionInfo::setVarArgsSaveSize

void setVarArgsSaveSize(int Size)

Definition:RISCVMachineFunctionInfo.h:96

llvm::RISCVMachineFunctionInfo::addSExt32Register

void addSExt32Register(Register Reg)

Definition:RISCVMachineFunctionInfo.cpp:69

llvm::RISCVSubtarget

Definition:RISCVSubtarget.h:78

llvm::RISCVSubtarget::getTargetABI

RISCVABI::ABI getTargetABI() const

Definition:RISCVSubtarget.h:233

llvm::RISCVSubtarget::getMinimumJumpTableEntries

unsigned getMinimumJumpTableEntries() const

Definition:RISCVSubtarget.cpp:214

llvm::RISCVSubtarget::hasStdExtCOrZca

bool hasStdExtCOrZca() const

Definition:RISCVSubtarget.h:162

llvm::RISCVSubtarget::getMaxLMULForFixedLengthVectors

unsigned getMaxLMULForFixedLengthVectors() const

Definition:RISCVSubtarget.cpp:190

llvm::RISCVSubtarget::hasVInstructionsI64

bool hasVInstructionsI64() const

Definition:RISCVSubtarget.h:249

llvm::RISCVSubtarget::hasVInstructionsF64

bool hasVInstructionsF64() const

Definition:RISCVSubtarget.h:254

llvm::RISCVSubtarget::getMaxStoresPerMemcpy

unsigned getMaxStoresPerMemcpy(bool OptSize) const

Definition:RISCVSubtarget.h:366

llvm::RISCVSubtarget::hasStdExtDOrZdinx

bool hasStdExtDOrZdinx() const

Definition:RISCVSubtarget.h:169

llvm::RISCVSubtarget::getMaxLoadsPerMemcmp

unsigned getMaxLoadsPerMemcmp(bool OptSize) const

Definition:RISCVSubtarget.h:376

llvm::RISCVSubtarget::hasStdExtZfhOrZhinx

bool hasStdExtZfhOrZhinx() const

Definition:RISCVSubtarget.h:170

llvm::RISCVSubtarget::getRealMinVLen

unsigned getRealMinVLen() const

Definition:RISCVSubtarget.h:206

llvm::RISCVSubtarget::getMaxStoresPerMemset

unsigned getMaxStoresPerMemset(bool OptSize) const

Definition:RISCVSubtarget.h:357

llvm::RISCVSubtarget::expandVScale

Quantity expandVScale(Quantity X) const

If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...

Definition:RISCVSubtarget.h:225

llvm::RISCVSubtarget::useRVVForFixedLengthVectors

bool useRVVForFixedLengthVectors() const

Definition:RISCVSubtarget.cpp:199

llvm::RISCVSubtarget::isTargetFuchsia

bool isTargetFuchsia() const

Definition:RISCVSubtarget.h:316

llvm::RISCVSubtarget::hasVInstructionsBF16Minimal

bool hasVInstructionsBF16Minimal() const

Definition:RISCVSubtarget.h:252

llvm::RISCVSubtarget::getDLenFactor

unsigned getDLenFactor() const

Definition:RISCVSubtarget.h:285

llvm::RISCVSubtarget::getXLenVT

MVT getXLenVT() const

Definition:RISCVSubtarget.h:185

llvm::RISCVSubtarget::getMaxStoresPerMemmove

unsigned getMaxStoresPerMemmove(bool OptSize) const

Definition:RISCVSubtarget.h:371

llvm::RISCVSubtarget::hasVInstructionsF16Minimal

bool hasVInstructionsF16Minimal() const

Definition:RISCVSubtarget.h:250

llvm::RISCVSubtarget::getMaxGluedStoresPerMemcpy

unsigned getMaxGluedStoresPerMemcpy() const

Definition:RISCVSubtarget.h:362

llvm::RISCVSubtarget::getXLen

unsigned getXLen() const

Definition:RISCVSubtarget.h:188

llvm::RISCVSubtarget::hasConditionalMoveFusion

bool hasConditionalMoveFusion() const

Definition:RISCVSubtarget.h:178

llvm::RISCVSubtarget::hasVInstructionsF16

bool hasVInstructionsF16() const

Definition:RISCVSubtarget.h:251

llvm::RISCVSubtarget::getMaxBuildIntsCost

unsigned getMaxBuildIntsCost() const

Definition:RISCVSubtarget.cpp:150

llvm::RISCVSubtarget::useCCMovInsn

bool useCCMovInsn() const

Definition:RISCVSubtarget.cpp:251

llvm::RISCVSubtarget::getPrefLoopAlignment

Align getPrefLoopAlignment() const

Definition:RISCVSubtarget.h:148

llvm::RISCVSubtarget::hasVInstructions

bool hasVInstructions() const

Definition:RISCVSubtarget.h:248

llvm::RISCVSubtarget::isRegisterReservedByUser

bool isRegisterReservedByUser(Register i) const override

Definition:RISCVSubtarget.h:239

llvm::RISCVSubtarget::getRealVLen

std::optional< unsigned > getRealVLen() const

Definition:RISCVSubtarget.h:215

llvm::RISCVSubtarget::hasOptimizedSegmentLoadStore

bool hasOptimizedSegmentLoadStore(unsigned NF) const

Definition:RISCVSubtarget.h:262

llvm::RISCVSubtarget::useConstantPoolForLargeInts

bool useConstantPoolForLargeInts() const

Definition:RISCVSubtarget.cpp:146

llvm::RISCVSubtarget::getPrefFunctionAlignment

Align getPrefFunctionAlignment() const

Definition:RISCVSubtarget.h:145

llvm::RISCVSubtarget::hasStdExtZfhminOrZhinxmin

bool hasStdExtZfhminOrZhinxmin() const

Definition:RISCVSubtarget.h:171

llvm::RISCVSubtarget::getRealMaxVLen

unsigned getRealMaxVLen() const

Definition:RISCVSubtarget.h:210

llvm::RISCVSubtarget::getRegisterInfo

const RISCVRegisterInfo * getRegisterInfo() const override

Definition:RISCVSubtarget.h:134

llvm::RISCVSubtarget::getInstrInfo

const RISCVInstrInfo * getInstrInfo() const override

Definition:RISCVSubtarget.h:133

llvm::RISCVSubtarget::getTargetLowering

const RISCVTargetLowering * getTargetLowering() const override

Definition:RISCVSubtarget.h:137

llvm::RISCVSubtarget::hasVInstructionsF32

bool hasVInstructionsF32() const

Definition:RISCVSubtarget.h:253

llvm::RISCVSubtarget::getELen

unsigned getELen() const

Definition:RISCVSubtarget.h:202

llvm::RISCVSubtarget::isTargetAndroid

bool isTargetAndroid() const

Definition:RISCVSubtarget.h:315

llvm::RISCVSubtarget::hasStdExtFOrZfinx

bool hasStdExtFOrZfinx() const

Definition:RISCVSubtarget.h:168

llvm::RISCVSubtarget::isSoftFPABI

bool isSoftFPABI() const

Definition:RISCVSubtarget.h:234

llvm::RISCVSubtarget::getFrameLowering

const RISCVFrameLowering * getFrameLowering() const override

Definition:RISCVSubtarget.h:130

llvm::RISCVSubtarget::getFLen

unsigned getFLen() const

Definition:RISCVSubtarget.h:193

llvm::RISCVSubtarget::is64Bit

bool is64Bit() const

Definition:RISCVSubtarget.h:184

llvm::RISCVTargetLowering

Definition:RISCVISelLowering.h:510

llvm::RISCVTargetLowering::computeVLMAXBounds

static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)

Definition:RISCVISelLowering.cpp:2795

llvm::RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs

static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)

Definition:RISCVISelLowering.cpp:2498

llvm::RISCVTargetLowering::getVRGatherVVCost

InstructionCost getVRGatherVVCost(MVT VT) const

Return the cost of a vrgather.vv instruction for the type VT.

Definition:RISCVISelLowering.cpp:2852

llvm::RISCVTargetLowering::getIndexedAddressParts

bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const

Definition:RISCVISelLowering.cpp:21763

llvm::RISCVTargetLowering::getSubregIndexByMVT

static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)

Definition:RISCVISelLowering.cpp:2424

llvm::RISCVTargetLowering::getIRStackGuard

Value * getIRStackGuard(IRBuilderBase &IRB) const override

If the target has a standard location for the stack protector cookie, returns the address of that loc...

Definition:RISCVISelLowering.cpp:22245

llvm::RISCVTargetLowering::shouldConvertFpToSat

bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override

Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...

Definition:RISCVISelLowering.cpp:21716

llvm::RISCVTargetLowering::getInlineAsmMemConstraint

InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override

Definition:RISCVISelLowering.cpp:21444

llvm::RISCVTargetLowering::LowerReturn

SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override

This hook must be implemented to lower outgoing return values, described by the Outs array,...

Definition:RISCVISelLowering.cpp:20720

llvm::RISCVTargetLowering::shouldFoldSelectWithIdentityConstant

bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override

Return true if pulling a binary operation into a select with an identity constant is profitable.

Definition:RISCVISelLowering.cpp:2050

llvm::RISCVTargetLowering::mayBeEmittedAsTailCall

bool mayBeEmittedAsTailCall(const CallInst *CI) const override

Return true if the target may be able emit the call instruction as a tail call.

Definition:RISCVISelLowering.cpp:20873

llvm::RISCVTargetLowering::RISCVTargetLowering

RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)

Definition:RISCVISelLowering.cpp:81

llvm::RISCVTargetLowering::EmitInstrWithCustomInserter

MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override

This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...

Definition:RISCVISelLowering.cpp:19829

llvm::RISCVTargetLowering::emitLeadingFence

Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override

Inserts in the IR a target-specific intrinsic specifying a fence.

Definition:RISCVISelLowering.cpp:21498

llvm::RISCVTargetLowering::isTruncateFree

bool isTruncateFree(Type *SrcTy, Type *DstTy) const override

Return true if it's free to truncate a value of type FromTy to type ToTy.

Definition:RISCVISelLowering.cpp:1944

llvm::RISCVTargetLowering::lowerInterleaveIntrinsicToStore

bool lowerInterleaveIntrinsicToStore(StoreInst *SI, ArrayRef< Value * > InterleaveValues) const override

Lower an interleave intrinsic to a target specific store intrinsic.

Definition:RISCVISelLowering.cpp:22549

llvm::RISCVTargetLowering::shouldRemoveExtendFromGSIndex

bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override

Definition:RISCVISelLowering.cpp:21706

llvm::RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic

Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override

Perform a masked atomicrmw using a target-specific intrinsic.

Definition:RISCVISelLowering.cpp:21613

llvm::RISCVTargetLowering::getOptimalMemOpType

EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override

Returns the target specific optimal type for load and store operations as a result of memset,...

Definition:RISCVISelLowering.cpp:22012

llvm::RISCVTargetLowering::allowsMisalignedMemoryAccesses

bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override

Returns true if the target allows unaligned memory accesses of the specified type.

Definition:RISCVISelLowering.cpp:21985

llvm::RISCVTargetLowering::getTargetConstantFromLoad

const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override

This method returns the constant pool value that will be loaded by LD.

Definition:RISCVISelLowering.cpp:19134

llvm::RISCVTargetLowering::getSubtarget

const RISCVSubtarget & getSubtarget() const

Definition:RISCVISelLowering.h:517

llvm::RISCVTargetLowering::PerformDAGCombine

SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override

This method will be invoked for all target nodes and for any target-independent nodes that the target...

Definition:RISCVISelLowering.cpp:17676

llvm::RISCVTargetLowering::isOffsetFoldingLegal

bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override

Return true if folding a constant offset with the given GlobalAddress is legal.

Definition:RISCVISelLowering.cpp:2138

llvm::RISCVTargetLowering::computeKnownBitsForTargetNode

void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override

Determine which of the bits specified in Mask are known to be either zero or one and return them in t...

Definition:RISCVISelLowering.cpp:18889

llvm::RISCVTargetLowering::preferScalarizeSplat

bool preferScalarizeSplat(SDNode *N) const override

Definition:RISCVISelLowering.cpp:22228

llvm::RISCVTargetLowering::getTargetNodeName

const char * getTargetNodeName(unsigned Opcode) const override

This method returns the name of a target specific DAG node.

Definition:RISCVISelLowering.cpp:20877

llvm::RISCVTargetLowering::shouldExtendTypeInLibCall

bool shouldExtendTypeInLibCall(EVT Type) const override

Returns true if arguments should be extended in lib calls.

Definition:RISCVISelLowering.cpp:21905

llvm::RISCVTargetLowering::isLegalAddImmediate

bool isLegalAddImmediate(int64_t Imm) const override

Return true if the specified immediate is legal add immediate, that is the target has add instruction...

Definition:RISCVISelLowering.cpp:1935

llvm::RISCVTargetLowering::shouldSignExtendTypeInLibCall

bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override

Returns true if arguments should be sign-extended in lib calls.

Definition:RISCVISelLowering.cpp:21915

llvm::RISCVTargetLowering::LowerCustomJumpTableEntry

const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override

Definition:RISCVISelLowering.cpp:21743

llvm::RISCVTargetLowering::getVRGatherVICost

InstructionCost getVRGatherVICost(MVT VT) const

Return the cost of a vrgather.vi (or vx) instruction for the type VT.

Definition:RISCVISelLowering.cpp:2859

llvm::RISCVTargetLowering::shouldConvertConstantLoadToIntImm

bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override

Return true if it is beneficial to convert a load of a constant to just the constant itself.

Definition:RISCVISelLowering.cpp:2062

llvm::RISCVTargetLowering::targetShrinkDemandedConstant

bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override

Definition:RISCVISelLowering.cpp:18784

llvm::RISCVTargetLowering::shouldExpandBuildVectorWithShuffles

bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override

Definition:RISCVISelLowering.cpp:2821

llvm::RISCVTargetLowering::getRegisterTypeForCallingConv

MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override

Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...

Definition:RISCVISelLowering.cpp:2247

llvm::RISCVTargetLowering::decomposeMulByConstant

bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override

Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...

Definition:RISCVISelLowering.cpp:21923

llvm::RISCVTargetLowering::CanLowerReturn

bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override

This hook should be implemented to check whether the return values described by the Outs array can fi...

Definition:RISCVISelLowering.cpp:20702

llvm::RISCVTargetLowering::isLegalAddressingMode

bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override

Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...

Definition:RISCVISelLowering.cpp:1897

llvm::RISCVTargetLowering::hasAndNotCompare

bool hasAndNotCompare(SDValue Y) const override

Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...

Definition:RISCVISelLowering.cpp:2027

llvm::RISCVTargetLowering::shouldScalarizeBinop

bool shouldScalarizeBinop(SDValue VecOp) const override

Try to convert an extract element of a vector binary operation into an extract element followed by a ...

Definition:RISCVISelLowering.cpp:2116

llvm::RISCVTargetLowering::isDesirableToCommuteWithShift

bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override

Return true if it is profitable to move this shift by a constant amount through its operand,...

Definition:RISCVISelLowering.cpp:18696

llvm::RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable

bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override

Return true if it is valid to merge the TargetMMOFlags in two SDNodes.

Definition:RISCVISelLowering.cpp:22693

llvm::RISCVTargetLowering::hasBitTest

bool hasBitTest(SDValue X, SDValue Y) const override

Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...

Definition:RISCVISelLowering.cpp:2038

llvm::RISCVTargetLowering::computeVLMAX

static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)

Definition:RISCVISelLowering.h:827

llvm::RISCVTargetLowering::shouldExpandCttzElements

bool shouldExpandCttzElements(EVT VT) const override

Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...

Definition:RISCVISelLowering.cpp:1616

llvm::RISCVTargetLowering::isCheapToSpeculateCtlz

bool isCheapToSpeculateCtlz(Type *Ty) const override

Return true if it is cheap to speculate a call to intrinsic ctlz.

Definition:RISCVISelLowering.cpp:2006

llvm::RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic

Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override

Perform a masked cmpxchg using a target-specific intrinsic.

Definition:RISCVISelLowering.cpp:21686

llvm::RISCVTargetLowering::isFPImmLegal

bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override

Returns true if the target can instruction select the specified FP immediate natively.

Definition:RISCVISelLowering.cpp:2169

llvm::RISCVTargetLowering::getLMULCost

InstructionCost getLMULCost(MVT VT) const

Return the cost of LMUL for linear operations.

Definition:RISCVISelLowering.cpp:2826

llvm::RISCVTargetLowering::getJumpTableEncoding

unsigned getJumpTableEncoding() const override

Return the entry encoding for a jump table in the current function.

Definition:RISCVISelLowering.cpp:21733

llvm::RISCVTargetLowering::isMulAddWithConstProfitable

bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override

Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...

Definition:RISCVISelLowering.cpp:21962

llvm::RISCVTargetLowering::getVSlideVICost

InstructionCost getVSlideVICost(MVT VT) const

Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.

Definition:RISCVISelLowering.cpp:2875

llvm::RISCVTargetLowering::fallBackToDAGISel

bool fallBackToDAGISel(const Instruction &Inst) const override

Definition:RISCVISelLowering.cpp:22741

llvm::RISCVTargetLowering::getSetCCResultType

EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override

Return the ValueType of the result of SETCC operations.

Definition:RISCVISelLowering.cpp:1574

llvm::RISCVTargetLowering::lowerInterleavedLoad

bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override

Lower an interleaved load into a vlsegN intrinsic.

Definition:RISCVISelLowering.cpp:22344

llvm::RISCVTargetLowering::isCtpopFast

bool isCtpopFast(EVT VT) const override

Return true if ctpop instruction is fast.

Definition:RISCVISelLowering.cpp:22698

llvm::RISCVTargetLowering::ComputeNumSignBitsForTargetNode

unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override

This method can be implemented by targets that want to expose additional information about sign bits ...

Definition:RISCVISelLowering.cpp:19030

llvm::RISCVTargetLowering::getContainerForFixedLengthVector

MVT getContainerForFixedLengthVector(MVT VT) const

Definition:RISCVISelLowering.cpp:2710

llvm::RISCVTargetLowering::getRegClassIDForVecVT

static unsigned getRegClassIDForVecVT(MVT VT)

Definition:RISCVISelLowering.cpp:2447

llvm::RISCVTargetLowering::getExceptionPointerRegister

If a physical register, this returns the register that receives the exception address on entry to an ...

Definition:RISCVISelLowering.cpp:21895

llvm::RISCVTargetLowering::shouldExpandAtomicRMWInIR

TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override

Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.

Definition:RISCVISelLowering.cpp:21532

llvm::RISCVTargetLowering::isExtractSubvectorCheap

bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override

Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...

Definition:RISCVISelLowering.cpp:2208

llvm::RISCVTargetLowering::getRegForInlineAsmConstraint

std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override

Given a physical register constraint (e.g.

Definition:RISCVISelLowering.cpp:21176

llvm::RISCVTargetLowering::emitDynamicProbedAlloc

MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const

Definition:RISCVISelLowering.cpp:22905

llvm::RISCVTargetLowering::getTargetMMOFlags

MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override

This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...

Definition:RISCVISelLowering.cpp:22649

llvm::RISCVTargetLowering::computeVLMax

SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const

Definition:RISCVISelLowering.cpp:2787

llvm::RISCVTargetLowering::signExtendConstant

bool signExtendConstant(const ConstantInt *CI) const override

Return true if this constant should be sign extended when promoting to a larger type.

Definition:RISCVISelLowering.cpp:1997

llvm::RISCVTargetLowering::shouldTransformSignedTruncationCheck

bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override

Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...

Definition:RISCVISelLowering.cpp:18677

llvm::RISCVTargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd

bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override

Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...

Definition:RISCVISelLowering.cpp:2093

llvm::RISCVTargetLowering::hasInlineStackProbe

bool hasInlineStackProbe(const MachineFunction &MF) const override

True if stack clash protection is enabled for this functions.

Definition:RISCVISelLowering.cpp:22853

llvm::RISCVTargetLowering::getRegisterByName

Returns the register with the specified architectural or ABI name.

Definition:RISCVISelLowering.cpp:22633

llvm::RISCVTargetLowering::getVSlideVXCost

InstructionCost getVSlideVXCost(MVT VT) const

Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.

Definition:RISCVISelLowering.cpp:2867

llvm::RISCVTargetLowering::LowerOperation

SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override

This callback is invoked for operations that are unsupported by the target, which are registered to u...

Definition:RISCVISelLowering.cpp:6662

llvm::RISCVTargetLowering::getRegClassIDForLMUL

static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)

Definition:RISCVISelLowering.cpp:2406

llvm::RISCVTargetLowering::isUsedByReturnOnly

bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override

Return true if result of the specified node is used by a return node only.

Definition:RISCVISelLowering.cpp:20836

llvm::RISCVTargetLowering::isFMAFasterThanFMulAndFAdd

bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override

Return true if an FMA operation is faster than a pair of fmul and fadd instructions.

Definition:RISCVISelLowering.cpp:21868

llvm::RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR

TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override

Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.

Definition:RISCVISelLowering.cpp:21673

llvm::RISCVTargetLowering::getExceptionSelectorRegister

If a physical register, this returns the register that receives the exception typeid on entry to a la...

Definition:RISCVISelLowering.cpp:21900

llvm::RISCVTargetLowering::getCustomCtpopCost

unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override

Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...

Definition:RISCVISelLowering.cpp:22707

llvm::RISCVTargetLowering::AdjustInstrPostInstrSelection

void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override

This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.

Definition:RISCVISelLowering.cpp:19926

llvm::RISCVTargetLowering::isShuffleMaskLegal

bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override

Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.

Definition:RISCVISelLowering.cpp:5751

llvm::RISCVTargetLowering::isCheapToSpeculateCttz

bool isCheapToSpeculateCttz(Type *Ty) const override

Return true if it is cheap to speculate a call to intrinsic cttz.

Definition:RISCVISelLowering.cpp:2001

llvm::RISCVTargetLowering::isLegalICmpImmediate

bool isLegalICmpImmediate(int64_t Imm) const override

Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...

Definition:RISCVISelLowering.cpp:1931

llvm::RISCVTargetLowering::getExtendForAtomicCmpSwapArg

ISD::NodeType getExtendForAtomicCmpSwapArg() const override

Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...

Definition:RISCVISelLowering.cpp:21890

llvm::RISCVTargetLowering::lowerInterleavedStore

bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override

Lower an interleaved store into a vssegN intrinsic.

Definition:RISCVISelLowering.cpp:22416

llvm::RISCVTargetLowering::LowerFormalArguments

SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override

This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...

Definition:RISCVISelLowering.cpp:20150

llvm::RISCVTargetLowering::ReplaceNodeResults

void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override

This callback is invoked when a node result type is illegal for the target, and the operation was reg...

Definition:RISCVISelLowering.cpp:12898

llvm::RISCVTargetLowering::getTgtMemIntrinsic

bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override

Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...

Definition:RISCVISelLowering.cpp:1621

llvm::RISCVTargetLowering::getVectorTypeBreakdownForCallingConv

unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override

Certain targets such as MIPS require that some types such as vectors are always broken down into scal...

Definition:RISCVISelLowering.cpp:2284

llvm::RISCVTargetLowering::isLegalElementTypeForRVV

bool isLegalElementTypeForRVV(EVT ScalarTy) const

Definition:RISCVISelLowering.cpp:2550

llvm::RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo

bool isVScaleKnownToBeAPowerOfTwo() const override

Return true only if vscale must be a power of two.

Definition:RISCVISelLowering.cpp:21751

llvm::RISCVTargetLowering::getLMUL

static RISCVII::VLMUL getLMUL(MVT VT)

Definition:RISCVISelLowering.cpp:2359

llvm::RISCVTargetLowering::getLegalZfaFPImm

int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const

Definition:RISCVISelLowering.cpp:2149

llvm::RISCVTargetLowering::LowerAsmOperandForConstraint

void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override

Lower the specified operand into the Ops vector.

Definition:RISCVISelLowering.cpp:21458

llvm::RISCVTargetLowering::splitValueIntoRegisterParts

bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override

Target-specific splitting of values into parts that fit a register storing a legal type.

Definition:RISCVISelLowering.cpp:22059

llvm::RISCVTargetLowering::emitTrailingFence

Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override

Definition:RISCVISelLowering.cpp:21514

llvm::RISCVTargetLowering::getNumRegistersForCallingConv

unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override

Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...

Definition:RISCVISelLowering.cpp:2272

llvm::RISCVTargetLowering::getConstraintType

ConstraintType getConstraintType(StringRef Constraint) const override

getConstraintType - Given a constraint letter, return the type of constraint it is for this target.

Definition:RISCVISelLowering.cpp:21148

llvm::RISCVTargetLowering::EmitKCFICheck

MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override

Definition:RISCVISelLowering.cpp:22612

llvm::RISCVTargetLowering::isLegalInterleavedAccessType

bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const

Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.

Definition:RISCVISelLowering.cpp:22268

llvm::RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode

bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override

Return true if Op can create undef or poison from non-undef & non-poison operands.

Definition:RISCVISelLowering.cpp:19116

llvm::RISCVTargetLowering::isIntDivCheap

bool isIntDivCheap(EVT VT, AttributeList Attr) const override

Return true if integer divide is usually cheaper than a sequence of several shifts,...

Definition:RISCVISelLowering.cpp:22220

llvm::RISCVTargetLowering::expandIndirectJTBranch

SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override

Expands target specific indirect branch for the case of JumpTable expansion.

Definition:RISCVISelLowering.cpp:22800

llvm::RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad

bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, ArrayRef< Value * > DeinterleaveValues) const override

Lower a deinterleave intrinsic to a target specific load intrinsic.

Definition:RISCVISelLowering.cpp:22480

llvm::RISCVTargetLowering::getNumRegisters

unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override

Return the number of registers for a given MVT, for inline assembly.

Definition:RISCVISelLowering.cpp:2262

llvm::RISCVTargetLowering::getPostIndexedAddressParts

bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override

Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...

Definition:RISCVISelLowering.cpp:21821

llvm::RISCVTargetLowering::getPreIndexedAddressParts

bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override

Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...

Definition:RISCVISelLowering.cpp:21799

llvm::RISCVTargetLowering::joinRegisterPartsIntoValue

SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override

Target-specific combining of register parts into its original value.

Definition:RISCVISelLowering.cpp:22153

llvm::RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial

bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override

Return if the target supports combining a chain like:

Definition:RISCVISelLowering.cpp:2011

llvm::RISCVTargetLowering::isSExtCheaperThanZExt

bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override

Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.

Definition:RISCVISelLowering.cpp:1993

llvm::RISCVTargetLowering::isLegalStridedLoadStore

bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const

Return true if a stride load store of the given result type and alignment is legal.

Definition:RISCVISelLowering.cpp:22306

llvm::RISCVTargetLowering::LowerCall

SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override

This hook must be implemented to lower calls into the specified DAG.

Definition:RISCVISelLowering.cpp:20383

llvm::RISCVTargetLowering::isZExtFree

bool isZExtFree(SDValue Val, EVT VT2) const override

Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...

Definition:RISCVISelLowering.cpp:1978

llvm::RISCVTargetLowering::getStackProbeSize

unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const

Definition:RISCVISelLowering.cpp:22863

llvm::RISCVTargetLowering::shouldInsertFencesForAtomic

bool shouldInsertFencesForAtomic(const Instruction *I) const override

Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.

Definition:RISCVISelLowering.cpp:22712

llvm::Register

Wrapper class representing virtual and physical registers.

Definition:Register.h:19

llvm::SDLoc

Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...

Definition:SelectionDAGNodes.h:1182

llvm::SDNode

Represents one node in the SelectionDAG.

Definition:SelectionDAGNodes.h:496

llvm::SDNode::ops

ArrayRef< SDUse > ops() const

Definition:SelectionDAGNodes.h:1001

llvm::SDNode::getAsAPIntVal

const APInt & getAsAPIntVal() const

Helper method returns the APInt value of a ConstantSDNode.

Definition:SelectionDAGNodes.h:1735

llvm::SDNode::getOpcode

unsigned getOpcode() const

Return the SelectionDAG opcode value for this node.

Definition:SelectionDAGNodes.h:687

llvm::SDNode::hasOneUse

bool hasOneUse() const

Return true if there is exactly one use of this node.

Definition:SelectionDAGNodes.h:739

llvm::SDNode::uses

iterator_range< use_iterator > uses()

Definition:SelectionDAGNodes.h:859

llvm::SDNode::getFlags

SDNodeFlags getFlags() const

Definition:SelectionDAGNodes.h:1043

llvm::SDNode::getSimpleValueType

MVT getSimpleValueType(unsigned ResNo) const

Return the type of a specified result as a simple type.

Definition:SelectionDAGNodes.h:1068

llvm::SDNode::hasPredecessorHelper

static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)

Returns true if N is a predecessor of any node in Worklist.

Definition:SelectionDAGNodes.h:914

llvm::SDNode::getAsZExtVal

uint64_t getAsZExtVal() const

Helper method returns the zero-extended integer value of a ConstantSDNode.

Definition:SelectionDAGNodes.h:1727

llvm::SDNode::getOperand

const SDValue & getOperand(unsigned Num) const

Definition:SelectionDAGNodes.h:992

llvm::SDNode::getValueType

EVT getValueType(unsigned ResNo) const

Return the type of a specified result.

Definition:SelectionDAGNodes.h:1062

llvm::SDNode::setCFIType

void setCFIType(uint32_t Type)

Definition:SelectionDAGNodes.h:1055

llvm::SDNode::isUndef

bool isUndef() const

Return true if the type of the node type undefined.

Definition:SelectionDAGNodes.h:694

llvm::SDNode::users

iterator_range< user_iterator > users()

Definition:SelectionDAGNodes.h:871

llvm::SDNode::op_end

op_iterator op_end() const

Definition:SelectionDAGNodes.h:1000

llvm::SDNode::op_begin

op_iterator op_begin() const

Definition:SelectionDAGNodes.h:999

llvm::SDUse

Represents a use of a SDNode.

Definition:SelectionDAGNodes.h:283

llvm::SDValue

Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.

Definition:SelectionDAGNodes.h:145

llvm::SDValue::isUndef

bool isUndef() const

Definition:SelectionDAGNodes.h:1249

llvm::SDValue::getNode

SDNode * getNode() const

get the SDNode which holds the desired result

Definition:SelectionDAGNodes.h:159

llvm::SDValue::hasOneUse

bool hasOneUse() const

Return true if there is exactly one node using value ResNo of Node.

Definition:SelectionDAGNodes.h:1257

llvm::SDValue::getValue

SDValue getValue(unsigned R) const

Definition:SelectionDAGNodes.h:179

llvm::SDValue::getValueType

EVT getValueType() const

Return the ValueType of the referenced return value.

Definition:SelectionDAGNodes.h:1217

llvm::SDValue::getValueSizeInBits

TypeSize getValueSizeInBits() const

Returns the size of the value in bits.

Definition:SelectionDAGNodes.h:199

llvm::SDValue::getOperand

const SDValue & getOperand(unsigned i) const

Definition:SelectionDAGNodes.h:1225

llvm::SDValue::getConstantOperandAPInt

const APInt & getConstantOperandAPInt(unsigned i) const

Definition:SelectionDAGNodes.h:1233

llvm::SDValue::getScalarValueSizeInBits

uint64_t getScalarValueSizeInBits() const

Definition:SelectionDAGNodes.h:203

llvm::SDValue::getConstantOperandVal

uint64_t getConstantOperandVal(unsigned i) const

Definition:SelectionDAGNodes.h:1229

llvm::SDValue::getSimpleValueType

MVT getSimpleValueType() const

Return the simple ValueType of the referenced return value.

Definition:SelectionDAGNodes.h:190

llvm::SDValue::getOpcode

unsigned getOpcode() const

Definition:SelectionDAGNodes.h:1213

llvm::SDValue::getNumOperands

unsigned getNumOperands() const

Definition:SelectionDAGNodes.h:1221

llvm::ScalableVectorType::get

static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)

Definition:Type.cpp:812

llvm::SelectionDAGTargetInfo::isTargetStrictFPOpcode

virtual bool isTargetStrictFPOpcode(unsigned Opcode) const

Returns true if a node with the given target-specific opcode has strict floating-point semantics.

Definition:SelectionDAGTargetInfo.h:45

llvm::SelectionDAG

This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...

Definition:SelectionDAG.h:228

llvm::SelectionDAG::getExtLoad

SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

Definition:SelectionDAG.cpp:9287

llvm::SelectionDAG::getTargetGlobalAddress

SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)

Definition:SelectionDAG.h:751

llvm::SelectionDAG::ComputeMaxSignificantBits

unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const

Get the upper bound on bit size for this Value Op as a signed integer.

Definition:SelectionDAG.cpp:5417

llvm::SelectionDAG::getMaskedGather

SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)

Definition:SelectionDAG.cpp:10040

llvm::SelectionDAG::getCopyToReg

SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)

Definition:SelectionDAG.h:802

llvm::SelectionDAG::getMergeValues

SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)

Create a MERGE_VALUES node from the given operands.

Definition:SelectionDAG.cpp:9034

llvm::SelectionDAG::getVTList

SDVTList getVTList(EVT VT)

Return an SDVTList that represents the list of values specified.

Definition:SelectionDAG.cpp:10708

llvm::SelectionDAG::getShiftAmountConstant

SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)

Definition:SelectionDAG.cpp:1811

llvm::SelectionDAG::getAllOnesConstant

SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)

Definition:SelectionDAG.cpp:1800

llvm::SelectionDAG::getMachineNode

MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)

These are used for target selectors to create a new node with specified return type(s),...

Definition:SelectionDAG.cpp:11149

llvm::SelectionDAG::getNeutralElement

SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)

Get the (commutative) neutral element for the given opcode, if it exists.

Definition:SelectionDAG.cpp:13545

llvm::SelectionDAG::getVScale

SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)

Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.

Definition:SelectionDAG.cpp:2092

llvm::SelectionDAG::getFreeze

SDValue getFreeze(SDValue V)

Return a freeze using the SDLoc of the value operand.

Definition:SelectionDAG.cpp:2462

llvm::SelectionDAG::getStridedLoadVP

SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)

Definition:SelectionDAG.cpp:9720

llvm::SelectionDAG::makeEquivalentMemoryOrdering

SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)

If an existing load has uses of its chain, create a token factor node with that chain and the new mem...

Definition:SelectionDAG.cpp:12152

llvm::SelectionDAG::getJumpTableDebugInfo

SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)

Definition:SelectionDAG.cpp:1961

llvm::SelectionDAG::getSetCC

SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)

Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...

Definition:SelectionDAG.h:1251

llvm::SelectionDAG::isSafeToSpeculativelyExecute

bool isSafeToSpeculativelyExecute(unsigned Opcode) const

Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...

Definition:SelectionDAG.h:2422

llvm::SelectionDAG::getConstantFP

SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)

Create a ConstantFPSDNode wrapping a constant value.

Definition:SelectionDAG.cpp:1873

llvm::SelectionDAG::getRegister

SDValue getRegister(Register Reg, EVT VT)

Definition:SelectionDAG.cpp:2328

llvm::SelectionDAG::getElementCount

SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)

Definition:SelectionDAG.cpp:2111

llvm::SelectionDAG::getLoad

SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)

Loads are not normal binary operators: their result type is not determined by their operands,...

Definition:SelectionDAG.cpp:9270

llvm::SelectionDAG::getStepVector

SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)

Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...

Definition:SelectionDAG.cpp:2125

llvm::SelectionDAG::getMemcpy

SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)

Definition:SelectionDAG.cpp:8581

llvm::SelectionDAG::addNoMergeSiteInfo

void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)

Set NoMergeSiteInfo to be associated with Node if NoMerge is true.

Definition:SelectionDAG.h:2394

llvm::SelectionDAG::shouldOptForSize

bool shouldOptForSize() const

Definition:SelectionDAG.cpp:1401

llvm::SelectionDAG::SplitVectorOperand

std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)

Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.

Definition:SelectionDAG.h:2306

llvm::SelectionDAG::getNOT

SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)

Create a bitwise NOT operation as (XOR Val, -1).

Definition:SelectionDAG.cpp:1622

llvm::SelectionDAG::getVPZExtOrTrunc

SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)

Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...

Definition:SelectionDAG.cpp:1642

llvm::SelectionDAG::getTargetLoweringInfo

const TargetLowering & getTargetLoweringInfo() const

Definition:SelectionDAG.h:503

llvm::SelectionDAG::getStridedStoreVP

SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)

Definition:SelectionDAG.cpp:9773

llvm::SelectionDAG::NewNodesMustHaveLegalTypes

bool NewNodesMustHaveLegalTypes

When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...

Definition:SelectionDAG.h:397

llvm::SelectionDAG::GetSplitDestVTs

std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const

Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...

Definition:SelectionDAG.cpp:12961

llvm::SelectionDAG::getTargetJumpTable

SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)

Definition:SelectionDAG.h:761

llvm::SelectionDAG::getUNDEF

SDValue getUNDEF(EVT VT)

Return an UNDEF node. UNDEF does not have a useful SDLoc.

Definition:SelectionDAG.h:1129

llvm::SelectionDAG::getCALLSEQ_END

SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)

Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).

Definition:SelectionDAG.h:1106

llvm::SelectionDAG::getGatherVP

SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)

Definition:SelectionDAG.cpp:9858

llvm::SelectionDAG::getBuildVector

SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)

Return an ISD::BUILD_VECTOR node.

Definition:SelectionDAG.h:857

llvm::SelectionDAG::isSplatValue

bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const

Test whether V has a splatted value for all the demanded elements.

Definition:SelectionDAG.cpp:3029

llvm::SelectionDAG::getBitcast

SDValue getBitcast(EVT VT, SDValue V)

Return a bitcast using the SDLoc of the value operand, and casting to the provided type.

Definition:SelectionDAG.cpp:2433

llvm::SelectionDAG::getCopyFromReg

SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)

Definition:SelectionDAG.h:828

llvm::SelectionDAG::getSelect

SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())

Helper function to make it easier to build Select's if you just have operands and don't want to check...

Definition:SelectionDAG.h:1280

llvm::SelectionDAG::getNegative

SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)

Create negative operation as (SUB 0, Val).

Definition:SelectionDAG.cpp:1617

llvm::SelectionDAG::setNodeMemRefs

void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)

Mutate the specified machine node's memory references to the provided list.

Definition:SelectionDAG.cpp:10917

llvm::SelectionDAG::getZeroExtendInReg

SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)

Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.

Definition:SelectionDAG.cpp:1568

llvm::SelectionDAG::getDataLayout

const DataLayout & getDataLayout() const

Definition:SelectionDAG.h:497

llvm::SelectionDAG::getSelectionDAGInfo

const SelectionDAGTargetInfo & getSelectionDAGInfo() const

Definition:SelectionDAG.h:505

llvm::SelectionDAG::getStoreVP

SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)

Definition:SelectionDAG.cpp:9584

llvm::SelectionDAG::getConstant

SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)

Create a ConstantSDNode wrapping a constant value.

Definition:SelectionDAG.cpp:1666

llvm::SelectionDAG::getMemBasePlusOffset

SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())

Returns sum of the base pointer and offset.

Definition:SelectionDAG.cpp:8052

llvm::SelectionDAG::getSignedTargetConstant

SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)

Definition:SelectionDAG.h:713

llvm::SelectionDAG::getTruncStore

SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

Definition:SelectionDAG.cpp:9371

llvm::SelectionDAG::ReplaceAllUsesWith

void ReplaceAllUsesWith(SDValue From, SDValue To)

Modify anything using 'From' to use 'To' instead.

Definition:SelectionDAG.cpp:11653

llvm::SelectionDAG::SplitVector

std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)

Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.

Definition:SelectionDAG.cpp:13006

llvm::SelectionDAG::getStore

SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

Helper function to build ISD::STORE nodes.

Definition:SelectionDAG.cpp:9320

llvm::SelectionDAG::getSignedConstant

SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)

Definition:SelectionDAG.cpp:1794

llvm::SelectionDAG::getSplatVector

SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)

Definition:SelectionDAG.h:891

llvm::SelectionDAG::getCALLSEQ_START

SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)

Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...

Definition:SelectionDAG.h:1094

llvm::SelectionDAG::SignBitIsZero

bool SignBitIsZero(SDValue Op, unsigned Depth=0) const

Return true if the sign bit of Op is known to be zero.

Definition:SelectionDAG.cpp:2969

llvm::SelectionDAG::FoldConstantArithmetic

SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())

Definition:SelectionDAG.cpp:6672

llvm::SelectionDAG::getMaskedStore

SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)

Definition:SelectionDAG.cpp:9991

llvm::SelectionDAG::getExternalSymbol

SDValue getExternalSymbol(const char *Sym, EVT VT)

Definition:SelectionDAG.cpp:2052

llvm::SelectionDAG::getTarget

const TargetMachine & getTarget() const

Definition:SelectionDAG.h:498

llvm::SelectionDAG::getStrictFPExtendOrRound

std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)

Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...

Definition:SelectionDAG.cpp:1483

llvm::SelectionDAG::SplitEVL

std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)

Split the explicit vector length parameter of a VP operation.

Definition:SelectionDAG.cpp:13027

llvm::SelectionDAG::getSelectCC

SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)

Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...

Definition:SelectionDAG.h:1290

llvm::SelectionDAG::getIntPtrConstant

SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)

Definition:SelectionDAG.cpp:1806

llvm::SelectionDAG::getScatterVP

SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)

Definition:SelectionDAG.cpp:9901

llvm::SelectionDAG::getValueType

SDValue getValueType(EVT)

Definition:SelectionDAG.cpp:2038

llvm::SelectionDAG::getNode

SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)

Gets or creates the specified node.

Definition:SelectionDAG.cpp:10327

llvm::SelectionDAG::getFPExtendOrRound

SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)

Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...

Definition:SelectionDAG.cpp:1475

llvm::SelectionDAG::isKnownNeverNaN

bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const

Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.

Definition:SelectionDAG.cpp:5672

llvm::SelectionDAG::getTargetConstant

SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)

Definition:SelectionDAG.h:701

llvm::SelectionDAG::ComputeNumSignBits

unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const

Return the number of times the sign bit of the register is replicated into the other bits.

Definition:SelectionDAG.cpp:4739

llvm::SelectionDAG::getBoolConstant

SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)

Create a true or false constant of type VT using the target's BooleanContent for type OpVT.

Definition:SelectionDAG.cpp:1651

llvm::SelectionDAG::getTargetBlockAddress

SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)

Definition:SelectionDAG.h:797

llvm::SelectionDAG::getVectorIdxConstant

SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)

Definition:SelectionDAG.cpp:1824

llvm::SelectionDAG::ReplaceAllUsesOfValueWith

void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)

Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.

Definition:SelectionDAG.cpp:11814

llvm::SelectionDAG::getMachineFunction

MachineFunction & getMachineFunction() const

Definition:SelectionDAG.h:492

llvm::SelectionDAG::getSplatBuildVector

SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)

Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.

Definition:SelectionDAG.h:874

llvm::SelectionDAG::getFrameIndex

SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)

Definition:SelectionDAG.cpp:1925

llvm::SelectionDAG::computeKnownBits

KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const

Determine which bits of Op are known to be either zero or one and return them in Known.

Definition:SelectionDAG.cpp:3415

llvm::SelectionDAG::getRegisterMask

SDValue getRegisterMask(const uint32_t *RegMask)

Definition:SelectionDAG.cpp:2344

llvm::SelectionDAG::getZExtOrTrunc

SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)

Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...

Definition:SelectionDAG.cpp:1508

llvm::SelectionDAG::getCondCode

SDValue getCondCode(ISD::CondCode Cond)

Definition:SelectionDAG.cpp:2079

llvm::SelectionDAG::MaskedValueIsZero

bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const

Return true if 'Op & Mask' is known to be zero.

Definition:SelectionDAG.cpp:2977

llvm::SelectionDAG::getContext

LLVMContext * getContext() const

Definition:SelectionDAG.h:510

llvm::SelectionDAG::getMemIntrinsicNode

SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())

Creates a MemIntrinsicNode that may produce a result and takes a list of operands.

Definition:SelectionDAG.cpp:9045

llvm::SelectionDAG::getTargetExternalSymbol

SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)

Definition:SelectionDAG.cpp:2069

llvm::SelectionDAG::CreateStackTemporary

SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)

Create a stack temporary based on the size in bytes and the alignment.

Definition:SelectionDAG.cpp:2776

llvm::SelectionDAG::getTargetConstantPool

SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)

Definition:SelectionDAG.h:768

llvm::SelectionDAG::getEntryNode

SDValue getEntryNode() const

Return the token chain corresponding to the entry of the function.

Definition:SelectionDAG.h:580

llvm::SelectionDAG::getMaskedLoad

SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)

Definition:SelectionDAG.cpp:9945

llvm::SelectionDAG::getSplat

SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)

Returns a node representing a splat of one value into all lanes of the provided vector type.

Definition:SelectionDAG.h:907

llvm::SelectionDAG::SplitScalar

std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)

Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.

Definition:SelectionDAG.cpp:12946

llvm::SelectionDAG::getVectorShuffle

SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)

Return an ISD::VECTOR_SHUFFLE node.

Definition:SelectionDAG.cpp:2147

llvm::SelectionDAG::getLogicalNOT

SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)

Create a logical NOT operation as (XOR Val, BooleanOne).

Definition:SelectionDAG.cpp:1626

llvm::SelectionDAG::getMaskedScatter

SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)

Definition:SelectionDAG.cpp:10087

llvm::ShuffleVectorInst

This instruction constructs a fixed permutation of two input vectors.

Definition:Instructions.h:1901

llvm::ShuffleVectorInst::isSelectMask

static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)

Return true if this shuffle mask chooses elements from its source vectors without lane crossings.

Definition:Instructions.cpp:1925

llvm::ShuffleVectorInst::isBitRotateMask

static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)

Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...

Definition:Instructions.cpp:2426

llvm::ShuffleVectorInst::getType

VectorType * getType() const

Overload to return most specific vector type.

Definition:Instructions.h:1941

llvm::ShuffleVectorInst::getShuffleMask

static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)

Convert the input shuffle mask operand to a vector of integers.

Definition:Instructions.cpp:1788

llvm::ShuffleVectorInst::isDeInterleaveMaskOfFactor

static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)

Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...

Definition:Instructions.cpp:2379

llvm::ShuffleVectorInst::isIdentityMask

static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)

Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...

Definition:Instructions.cpp:1883

llvm::ShuffleVectorInst::isReverseMask

static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)

Return true if this shuffle mask swaps the order of elements from exactly one source vector.

Definition:Instructions.cpp:1891

llvm::ShuffleVectorInst::isInsertSubvectorMask

static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)

Return true if this shuffle mask is an insert subvector mask.

Definition:Instructions.cpp:2039

llvm::ShuffleVectorInst::isInterleaveMask

static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)

Return true if the mask interleaves one or more input vectors together.

Definition:Instructions.cpp:2295

llvm::ShuffleVectorSDNode

This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...

Definition:SelectionDAGNodes.h:1625

llvm::ShuffleVectorSDNode::isSplatMask

static bool isSplatMask(const int *Mask, EVT VT)

Definition:SelectionDAG.cpp:13418

llvm::ShuffleVectorSDNode::getSplatIndex

int getSplatIndex() const

Definition:SelectionDAGNodes.h:1650

llvm::ShuffleVectorSDNode::getMask

ArrayRef< int > getMask() const

Definition:SelectionDAGNodes.h:1638

llvm::ShuffleVectorSDNode::isSplat

bool isSplat() const

Definition:SelectionDAGNodes.h:1648

llvm::SmallDenseMap

Definition:DenseMap.h:883

llvm::SmallPtrSet

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

Definition:SmallPtrSet.h:519

llvm::SmallSet

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

Definition:SmallSet.h:132

llvm::SmallSet::count

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

Definition:SmallSet.h:175

llvm::SmallSet::insert

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

Definition:SmallSet.h:181

llvm::SmallVectorBase::empty

bool empty() const

Definition:SmallVector.h:81

llvm::SmallVectorBase::size

size_t size() const

Definition:SmallVector.h:78

llvm::SmallVectorImpl

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

Definition:SmallVector.h:573

llvm::SmallVectorImpl::pop_back_val

T pop_back_val()

Definition:SmallVector.h:673

llvm::SmallVectorImpl::emplace_back

reference emplace_back(ArgTypes &&... Args)

Definition:SmallVector.h:937

llvm::SmallVectorImpl::reserve

void reserve(size_type N)

Definition:SmallVector.h:663

llvm::SmallVectorImpl::append

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

Definition:SmallVector.h:683

llvm::SmallVectorTemplateBase::push_back

void push_back(const T &Elt)

Definition:SmallVector.h:413

llvm::SmallVectorTemplateCommon::end

iterator end()

Definition:SmallVector.h:269

llvm::SmallVectorTemplateCommon::front

reference front()

Definition:SmallVector.h:299

llvm::SmallVectorTemplateCommon::data

pointer data()

Return a pointer to the vector's buffer, even if empty().

Definition:SmallVector.h:286

llvm::SmallVectorTemplateCommon::begin

iterator begin()

Definition:SmallVector.h:267

llvm::SmallVectorTemplateCommon::back

reference back()

Definition:SmallVector.h:308

llvm::SmallVector

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Definition:SmallVector.h:1196

llvm::StoreInst

An instruction for storing to memory.

Definition:Instructions.h:292

llvm::StoreSDNode

This class is used to represent ISD::STORE nodes.

Definition:SelectionDAGNodes.h:2492

llvm::StringRef

StringRef - Represent a constant reference to a string, i.e.

Definition:StringRef.h:51

llvm::StringRef::size

constexpr size_t size() const

size - Get the string size.

Definition:StringRef.h:150

llvm::StringRef::lower

std::string lower() const

Definition:StringRef.cpp:113

llvm::StringSwitch

A switch()-like statement whose cases are string literals.

Definition:StringSwitch.h:44

llvm::StringSwitch::Case

StringSwitch & Case(StringLiteral S, T Value)

Definition:StringSwitch.h:69

llvm::StringSwitch::Default

R Default(T Value)

Definition:StringSwitch.h:182

llvm::StringSwitch::Cases

StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)

Definition:StringSwitch.h:90

llvm::StructType::get

static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)

This static method is the primary way to create a literal StructType.

Definition:Type.cpp:406

llvm::TargetExtType::get

static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})

Return a target extension type having the specified name and optional type and integer parameters.

Definition:Type.cpp:895

llvm::TargetFrameLowering::getStackAlign

Align getStackAlign() const

getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...

Definition:TargetFrameLowering.h:105

llvm::TargetInstrInfo

TargetInstrInfo - Interface to description of machine instruction set.

Definition:TargetInstrInfo.h:112

llvm::TargetLoweringBase::setBooleanVectorContents

void setBooleanVectorContents(BooleanContent Ty)

Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...

Definition:TargetLowering.h:2493

llvm::TargetLoweringBase::setOperationAction

void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)

Indicate that the specified operation does not work with the specified type and indicate what to do a...

Definition:TargetLowering.h:2562

llvm::TargetLoweringBase::PredictableSelectIsExpensive

bool PredictableSelectIsExpensive

Tells the code generator that select is more expensive than a branch if the branch is usually predict...

Definition:TargetLowering.h:3757

llvm::TargetLoweringBase::getValueType

EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const

Return the EVT corresponding to this LLVM type.

Definition:TargetLowering.h:1677

llvm::TargetLoweringBase::Custom

@ Custom

Definition:TargetLowering.h:204

llvm::TargetLoweringBase::Expand

@ Expand

Definition:TargetLowering.h:202

llvm::TargetLoweringBase::Promote

@ Promote

Definition:TargetLowering.h:201

llvm::TargetLoweringBase::LibCall

@ LibCall

Definition:TargetLowering.h:203

llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize

unsigned MaxStoresPerMemcpyOptSize

Likewise for functions with the OptSize attribute.

Definition:TargetLowering.h:3718

llvm::TargetLoweringBase::emitPatchPoint

MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const

Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...

Definition:TargetLoweringBase.cpp:1155

llvm::TargetLoweringBase::getRegClassFor

virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const

Return the register class that should be used for the specified value type.

Definition:TargetLowering.h:1042

llvm::TargetLoweringBase::getTargetMachine

const TargetMachine & getTargetMachine() const

Definition:TargetLowering.h:364

llvm::TargetLoweringBase::MaxLoadsPerMemcmp

unsigned MaxLoadsPerMemcmp

Specify maximum number of load instructions per memcmp call.

Definition:TargetLowering.h:3737

llvm::TargetLoweringBase::getNumRegistersForCallingConv

virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const

Certain targets require unusual breakdowns of certain types.

Definition:TargetLowering.h:1795

llvm::TargetLoweringBase::MaxGluedStoresPerMemcpy

unsigned MaxGluedStoresPerMemcpy

Specify max number of store instructions to glue in inlined memcpy.

Definition:TargetLowering.h:3724

llvm::TargetLoweringBase::isZExtFree

virtual bool isZExtFree(Type *FromTy, Type *ToTy) const

Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...

Definition:TargetLowering.h:3066

llvm::TargetLoweringBase::getRegisterTypeForCallingConv

virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const

Certain combinations of ABIs, Targets and features require that types are legal for some operations a...

Definition:TargetLowering.h:1787

llvm::TargetLoweringBase::setOperationPromotedToType

void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)

Convenience method to set an operation to Promote and specify the type in a single call.

Definition:TargetLowering.h:2716

llvm::TargetLoweringBase::TypeSoftenFloat

@ TypeSoftenFloat

Definition:TargetLowering.h:213

llvm::TargetLoweringBase::TypeSplitVector

@ TypeSplitVector

Definition:TargetLowering.h:216

llvm::TargetLoweringBase::getMinCmpXchgSizeInBits

unsigned getMinCmpXchgSizeInBits() const

Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.

Definition:TargetLowering.h:2163

llvm::TargetLoweringBase::getNumRegisters

virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const

Return the number of registers that this ValueType will eventually require.

Definition:TargetLowering.h:1763

llvm::TargetLoweringBase::setIndexedLoadAction

void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)

Indicate that the specified indexed load does or does not work with the specified type and indicate w...

Definition:TargetLowering.h:2635

llvm::TargetLoweringBase::setPrefLoopAlignment

void setPrefLoopAlignment(Align Alignment)

Set the target's preferred loop alignment.

Definition:TargetLowering.h:2752

llvm::TargetLoweringBase::setMaxAtomicSizeInBitsSupported

void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)

Set the maximum atomic operation size supported by the backend.

Definition:TargetLowering.h:2766

llvm::TargetLoweringBase::getVectorTypeBreakdownForCallingConv

virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const

Certain targets such as MIPS require that some types such as vectors are always broken down into scal...

Definition:TargetLowering.h:1195

llvm::TargetLoweringBase::setMinFunctionAlignment

void setMinFunctionAlignment(Align Alignment)

Set the target's minimum function alignment.

Definition:TargetLowering.h:2739

llvm::TargetLoweringBase::isOperationCustom

bool isOperationCustom(unsigned Op, EVT VT) const

Return true if the operation uses custom lowering, regardless of whether the type is legal or not.

Definition:TargetLowering.h:1380

llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize

unsigned MaxStoresPerMemsetOptSize

Likewise for functions with the OptSize attribute.

Definition:TargetLowering.h:3703

llvm::TargetLoweringBase::setBooleanContents

void setBooleanContents(BooleanContent Ty)

Specify how the target extends the result of integer and floating point boolean values from i1 to a w...

Definition:TargetLowering.h:2479

llvm::TargetLoweringBase::MaxStoresPerMemmove

unsigned MaxStoresPerMemmove

Specify maximum number of store instructions per memmove call.

Definition:TargetLowering.h:3751

llvm::TargetLoweringBase::computeRegisterProperties

void computeRegisterProperties(const TargetRegisterInfo *TRI)

Once all of the register classes are added, this allows us to compute derived properties we expose.

Definition:TargetLoweringBase.cpp:1275

llvm::TargetLoweringBase::isTruncateFree

virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const

Return true if it's free to truncate a value of type FromTy to type ToTy.

Definition:TargetLowering.h:2972

llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize

unsigned MaxStoresPerMemmoveOptSize

Likewise for functions with the OptSize attribute.

Definition:TargetLowering.h:3753

llvm::TargetLoweringBase::shouldFoldSelectWithSingleBitTest

virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const

Definition:TargetLowering.h:3390

llvm::TargetLoweringBase::getIRStackGuard

virtual Value * getIRStackGuard(IRBuilderBase &IRB) const

If the target has a standard location for the stack protector guard, returns the address of that loca...

Definition:TargetLoweringBase.cpp:1956

llvm::TargetLoweringBase::addRegisterClass

void addRegisterClass(MVT VT, const TargetRegisterClass *RC)

Add the specified register class as an available regclass for the specified value type.

Definition:TargetLowering.h:2545

llvm::TargetLoweringBase::isTypeLegal

bool isTypeLegal(EVT VT) const

Return true if the target has native support for the specified value type.

Definition:TargetLowering.h:1093

llvm::TargetLoweringBase::EnableExtLdPromotion

bool EnableExtLdPromotion

Definition:TargetLowering.h:3760

llvm::TargetLoweringBase::setIndexedStoreAction

void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)

Indicate that the specified indexed store does or does not work with the specified type and indicate ...

Definition:TargetLowering.h:2652

llvm::TargetLoweringBase::getPointerTy

virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const

Return the pointer type for the given address space, defaults to the pointer type from the data layou...

Definition:TargetLowering.h:371

llvm::TargetLoweringBase::setLibcallName

void setLibcallName(RTLIB::Libcall Call, const char *Name)

Rename the default libcall routine name for the specified libcall.

Definition:TargetLowering.h:3431

llvm::TargetLoweringBase::setPrefFunctionAlignment

void setPrefFunctionAlignment(Align Alignment)

Set the target's preferred function alignment.

Definition:TargetLowering.h:2745

llvm::TargetLoweringBase::isOperationLegal

bool isOperationLegal(unsigned Op, EVT VT) const

Return true if the specified operation is legal on this target.

Definition:TargetLowering.h:1447

llvm::TargetLoweringBase::MaxStoresPerMemset

unsigned MaxStoresPerMemset

Specify maximum number of store instructions per memset call.

Definition:TargetLowering.h:3701

llvm::TargetLoweringBase::setTruncStoreAction

void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)

Indicate that the specified truncating store does not work with the specified type and indicate what ...

Definition:TargetLowering.h:2625

llvm::TargetLoweringBase::ZeroOrOneBooleanContent

@ ZeroOrOneBooleanContent

Definition:TargetLowering.h:236

llvm::TargetLoweringBase::isOperationLegalOrCustom

bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const

Return true if the specified operation is legal on this target or can be made legal with custom lower...

Definition:TargetLowering.h:1339

llvm::TargetLoweringBase::MaxLoadsPerMemcmpOptSize

unsigned MaxLoadsPerMemcmpOptSize

Likewise for functions with the OptSize attribute.

Definition:TargetLowering.h:3739

llvm::TargetLoweringBase::isBinOp

virtual bool isBinOp(unsigned Opcode) const

Return true if the node is a math/logic binary operator.

Definition:TargetLowering.h:2941

llvm::TargetLoweringBase::setMinCmpXchgSizeInBits

void setMinCmpXchgSizeInBits(unsigned SizeInBits)

Sets the minimum cmpxchg or ll/sc size supported by the backend.

Definition:TargetLowering.h:2783

llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore

void setStackPointerRegisterToSaveRestore(Register R)

If set to a physical register, this specifies the register that llvm.savestack/llvm....

Definition:TargetLowering.h:2511

llvm::TargetLoweringBase::AtomicExpansionKind

AtomicExpansionKind

Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.

Definition:TargetLowering.h:253

llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg

@ CmpXChg

llvm::TargetLoweringBase::AtomicExpansionKind::None

@ None

llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic

@ MaskedIntrinsic

llvm::TargetLoweringBase::setCondCodeAction

void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)

Indicate that the specified condition code is or isn't supported on the target and indicate what to d...

Definition:TargetLowering.h:2686

llvm::TargetLoweringBase::findRepresentativeClass

virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const

Return the largest legal super-reg register class of the register class for the specified type and it...

Definition:TargetLoweringBase.cpp:1248

llvm::TargetLoweringBase::setTargetDAGCombine

void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)

Targets should invoke this method for each target independent node that they want to provide a custom...

Definition:TargetLowering.h:2731

llvm::TargetLoweringBase::setLoadExtAction

void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)

Indicate that the specified load with extension does not work with the specified type and indicate wh...

Definition:TargetLowering.h:2579

llvm::TargetLoweringBase::getTypeAction

LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const

Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...

Definition:TargetLowering.h:1143

llvm::TargetLoweringBase::IsStrictFPEnabled

bool IsStrictFPEnabled

Definition:TargetLowering.h:3772

llvm::TargetLoweringBase::ArgListTy

std::vector< ArgListEntry > ArgListTy

Definition:TargetLowering.h:329

llvm::TargetLoweringBase::allowsMemoryAccessForAlignment

bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const

This function returns true if the memory access is aligned or if the target allows this specific unal...

Definition:TargetLoweringBase.cpp:1708

llvm::TargetLoweringBase::MaxStoresPerMemcpy

unsigned MaxStoresPerMemcpy

Specify maximum number of store instructions per memcpy call.

Definition:TargetLowering.h:3716

llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote

bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const

Return true if the specified operation is legal on this target or can be made legal with custom lower...

Definition:TargetLowering.h:1367

llvm::TargetLoweringObjectFile

Definition:TargetLoweringObjectFile.h:45

llvm::TargetLowering

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Definition:TargetLowering.h:3780

llvm::TargetLowering::expandAddSubSat

SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const

Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.

Definition:TargetLowering.cpp:10670

llvm::TargetLowering::buildSDIVPow2WithCMov

SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const

Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...

Definition:TargetLowering.cpp:6292

llvm::TargetLowering::ConstraintType

ConstraintType

Definition:TargetLowering.h:4950

llvm::TargetLowering::C_RegisterClass

@ C_RegisterClass

Definition:TargetLowering.h:4952

llvm::TargetLowering::C_Memory

@ C_Memory

Definition:TargetLowering.h:4953

llvm::TargetLowering::C_Immediate

@ C_Immediate

Definition:TargetLowering.h:4955

llvm::TargetLowering::C_Other

@ C_Other

Definition:TargetLowering.h:4956

llvm::TargetLowering::makeLibCall

std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const

Returns a pair of (return value, chain).

Definition:TargetLowering.cpp:147

llvm::TargetLowering::expandIndirectJTBranch

virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const

Expands target specific indirect branch for the case of JumpTable expansion.

Definition:TargetLowering.cpp:478

llvm::TargetLowering::getInlineAsmMemConstraint

virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const

Definition:TargetLowering.h:5062

llvm::TargetLowering::getConstraintType

virtual ConstraintType getConstraintType(StringRef Constraint) const

Given a constraint, return the type of constraint it is for this target.

Definition:TargetLowering.cpp:5525

llvm::TargetLowering::LowerToTLSEmulatedModel

virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const

Lower TLS global address SDNode for target independent emulated TLS model.

Definition:TargetLowering.cpp:10526

llvm::TargetLowering::LowerCallTo

std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const

This function lowers an abstract call to a function into an actual call.

Definition:SelectionDAGBuilder.cpp:10958

llvm::TargetLowering::isPositionIndependent

bool isPositionIndependent() const

Definition:TargetLowering.cpp:47

llvm::TargetLowering::getRegForInlineAsmConstraint

virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const

Given a physical register constraint (e.g.

Definition:TargetLowering.cpp:5669

llvm::TargetLowering::SimplifyDemandedBits

bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const

Look at Op.

Definition:TargetLowering.cpp:1134

llvm::TargetLowering::verifyReturnAddressArgumentIsConstant

bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const

Definition:TargetLowering.cpp:7260

llvm::TargetLowering::LowerAsmOperandForConstraint

virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const

Lower the specified operand into the Ops vector.

Definition:TargetLowering.cpp:5587

llvm::TargetLowering::getJumpTableEncoding

virtual unsigned getJumpTableEncoding() const

Return the entry encoding for a jump table in the current function.

Definition:TargetLowering.cpp:444

llvm::TargetLowering::canCreateUndefOrPoisonForTargetNode

virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const

Return true if Op can create undef or poison from non-undef & non-poison operands.

Definition:TargetLowering.cpp:3908

llvm::TargetMachine

Primary interface to the complete machine description for the target machine.

Definition:TargetMachine.h:77

llvm::TargetMachine::getTLSModel

TLSModel::Model getTLSModel(const GlobalValue *GV) const

Returns the TLS model which should be used for the given global variable.

Definition:TargetMachine.cpp:238

llvm::TargetMachine::getTargetTriple

const Triple & getTargetTriple() const

Definition:TargetMachine.h:126

llvm::TargetMachine::useTLSDESC

bool useTLSDESC() const

Returns true if this target uses TLS Descriptors.

Definition:TargetMachine.cpp:236

llvm::TargetMachine::getMCSubtargetInfo

const MCSubtargetInfo * getMCSubtargetInfo() const

Definition:TargetMachine.h:216

llvm::TargetMachine::useEmulatedTLS

bool useEmulatedTLS() const

Returns true if this target uses emulated TLS.

Definition:TargetMachine.cpp:235

llvm::TargetMachine::getObjFileLowering

virtual TargetLoweringObjectFile * getObjFileLowering() const

Definition:TargetMachine.h:136

llvm::TargetRegisterClass

Definition:TargetRegisterInfo.h:44

llvm::TargetRegisterInfo

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

Definition:TargetRegisterInfo.h:235

llvm::TargetSubtargetInfo::getRegisterInfo

virtual const TargetRegisterInfo * getRegisterInfo() const

getRegisterInfo - If register information is available, return it.

Definition:TargetSubtargetInfo.h:129

llvm::TargetSubtargetInfo::isRegisterReservedByUser

virtual bool isRegisterReservedByUser(Register R) const

Definition:TargetSubtargetInfo.h:354

llvm::TargetSubtargetInfo::getInstrInfo

virtual const TargetInstrInfo * getInstrInfo() const

Definition:TargetSubtargetInfo.h:97

llvm::Target

Target - Wrapper for Target specific information.

Definition:TargetRegistry.h:144

llvm::Triple::isOSBinFormatCOFF

bool isOSBinFormatCOFF() const

Tests whether the OS uses the COFF binary format.

Definition:Triple.h:755

llvm::Twine

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

Definition:Twine.h:81

llvm::TypeSize

Definition:TypeSize.h:334

llvm::TypeSize::getFixed

static constexpr TypeSize getFixed(ScalarTy ExactSize)

Definition:TypeSize.h:345

llvm::TypeSize::getScalable

static constexpr TypeSize getScalable(ScalarTy MinimumSize)

Definition:TypeSize.h:348

llvm::Type

The instances of the Type class are immutable: once they are created, they are never changed.

Definition:Type.h:45

llvm::Type::getIntegerBitWidth

unsigned getIntegerBitWidth() const

llvm::Type::getStructElementType

Type * getStructElementType(unsigned N) const

llvm::Type::getIntNTy

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::isStructTy

bool isStructTy() const

True if this is an instance of StructType.

Definition:Type.h:258

llvm::Type::isTargetExtTy

bool isTargetExtTy() const

Return true if this is a target extension type.

Definition:Type.h:203

llvm::Type::isScalableTy

bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const

Return true if this is a type whose size is a known multiple of vscale.

llvm::Type::getContext

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

Definition:Type.h:128

llvm::Type::getInt8Ty

static IntegerType * getInt8Ty(LLVMContext &C)

llvm::Type::isIntegerTy

bool isIntegerTy() const

True if this is an instance of IntegerType.

Definition:Type.h:237

llvm::Type::getPrimitiveSizeInBits

TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

llvm::Type::getScalarType

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

Definition:Type.h:355

llvm::Use

A Use represents the edge between a Value definition and its users.

Definition:Use.h:43

llvm::Use::getUser

User * getUser() const

Returns the User that contains this Use.

Definition:Use.h:72

llvm::Use::getOperandNo

unsigned getOperandNo() const

Return the operand # of this use in its User.

Definition:Use.cpp:31

llvm::User

Definition:User.h:44

llvm::User::getOperand

Value * getOperand(unsigned i) const

Definition:User.h:228

llvm::User::getNumOperands

unsigned getNumOperands() const

Definition:User.h:250

llvm::Value

LLVM Value Representation.

Definition:Value.h:74

llvm::Value::getType

Type * getType() const

All values are typed, get the type of this value.

Definition:Value.h:255

llvm::Value::hasOneUse

bool hasOneUse() const

Return true if there is exactly one use of this value.

Definition:Value.h:434

llvm::Value::replaceAllUsesWith

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

Definition:Value.cpp:534

llvm::Value::getContext

LLVMContext & getContext() const

All values hold a context through their type.

Definition:Value.cpp:1075

llvm::VectorType

Base class of all SIMD vector types.

Definition:DerivedTypes.h:427

llvm::cl::opt

Definition:CommandLine.h:1423

llvm::details::FixedOrScalableQuantity::isKnownMultipleOf

constexpr bool isKnownMultipleOf(ScalarTy RHS) const

This function tells the caller whether the element count is known at compile time to be a multiple of...

Definition:TypeSize.h:183

llvm::details::FixedOrScalableQuantity::getFixedValue

constexpr ScalarTy getFixedValue() const

Definition:TypeSize.h:202

llvm::details::FixedOrScalableQuantity< TypeSize, uint64_t >::isKnownLE

static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)

Definition:TypeSize.h:232

llvm::details::FixedOrScalableQuantity::multiplyCoefficientBy

constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const

Definition:TypeSize.h:258

llvm::details::FixedOrScalableQuantity::getKnownMinValue

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

Definition:TypeSize.h:168

llvm::details::FixedOrScalableQuantity::isZero

constexpr bool isZero() const

Definition:TypeSize.h:156

llvm::details::FixedOrScalableQuantity::divideCoefficientBy

constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const

We do not provide the '/' operator here because division for polynomial types does not work in the sa...

Definition:TypeSize.h:254

llvm::ilist_node_impl::getIterator

self_iterator getIterator()

Definition:ilist_node.h:132

#define INT64_MIN

Definition:DataTypes.h:74

ErrorHandling.h

llvm_unreachable

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition:ErrorHandling.h:143

false

Definition:StackSlotColoring.cpp:193

llvm::AArch64::RM

@ RM

Definition:AArch64ISelLowering.h:542

llvm::AArch64::Fixups

Fixups

Definition:AArch64FixupKinds.h:17

llvm::AMDGPU::HSAMD::Kernel::Key::Args

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

Definition:AMDGPUMetadata.h:395

llvm::AMDGPU::IsaInfo::TargetIDSetting::Off

@ Off

llvm::AMDGPU::Imm

@ Imm

Definition:AMDGPURegBankLegalizeRules.h:105

llvm::ARCISD::CMOV

@ CMOV

Definition:ARCISelLowering.h:43

llvm::BitmaskEnumDetail::Mask

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

Definition:BitmaskEnum.h:125

llvm::COFF::Entry

@ Entry

Definition:COFF.h:844

llvm::CallingConv::RISCV_VectorCall

@ RISCV_VectorCall

Calling convention used for RISC-V V-extension.

Definition:CallingConv.h:268

llvm::CallingConv::GHC

@ GHC

Used by the Glasgow Haskell Compiler (GHC).

Definition:CallingConv.h:50

llvm::CallingConv::SPIR_KERNEL

@ SPIR_KERNEL

Used for SPIR kernel functions.

Definition:CallingConv.h:144

llvm::CallingConv::Fast

@ Fast

Attempts to make calls as fast as possible (e.g.

Definition:CallingConv.h:41

llvm::CallingConv::Tail

@ Tail

Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...

Definition:CallingConv.h:76

llvm::CallingConv::GRAAL

@ GRAAL

Used by GraalVM. Two additional registers are reserved.

Definition:CallingConv.h:255

llvm::CallingConv::C

@ C

The default llvm calling convention, compatible with C.

Definition:CallingConv.h:34

llvm::CodeModel::Medium

@ Medium

Definition:CodeGen.h:31

llvm::CodeModel::Large

@ Large

Definition:CodeGen.h:31

llvm::CodeModel::Small

@ Small

Definition:CodeGen.h:31

llvm::IRSimilarity::Legal

@ Legal

Definition:IRSimilarityIdentifier.h:76

llvm::ISD::isConstantSplatVectorAllOnes

bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)

Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...

Definition:SelectionDAG.cpp:188

llvm::ISD::isNON_EXTLoad

bool isNON_EXTLoad(const SDNode *N)

Returns true if the specified node is a non-extending load.

Definition:SelectionDAGNodes.h:3215

llvm::ISD::NodeType

NodeType

ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.

Definition:ISDOpcodes.h:40

llvm::ISD::SETCC

@ SETCC

SetCC operator - This evaluates to a true value iff the condition is true.

Definition:ISDOpcodes.h:780

llvm::ISD::STACKRESTORE

@ STACKRESTORE

STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.

Definition:ISDOpcodes.h:1197

llvm::ISD::STACKSAVE

@ STACKSAVE

STACKSAVE - STACKSAVE has one operand, an input chain.

Definition:ISDOpcodes.h:1193

llvm::ISD::CTLZ_ZERO_UNDEF

@ CTLZ_ZERO_UNDEF

Definition:ISDOpcodes.h:753

llvm::ISD::STRICT_FSETCC

@ STRICT_FSETCC

STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.

Definition:ISDOpcodes.h:491

llvm::ISD::STORE

@ STORE

Definition:ISDOpcodes.h:1103

llvm::ISD::LRINT

@ LRINT

Definition:ISDOpcodes.h:1021

llvm::ISD::DELETED_NODE

@ DELETED_NODE

DELETED_NODE - This is an illegal value that is used to catch errors.

Definition:ISDOpcodes.h:44

llvm::ISD::VECREDUCE_SEQ_FADD

@ VECREDUCE_SEQ_FADD

Generic reduction nodes.

Definition:ISDOpcodes.h:1417

llvm::ISD::FP_TO_BF16

@ FP_TO_BF16

Definition:ISDOpcodes.h:974

llvm::ISD::JumpTable

@ JumpTable

Definition:ISDOpcodes.h:81

llvm::ISD::FLOG10

@ FLOG10

Definition:ISDOpcodes.h:1008

llvm::ISD::MLOAD

@ MLOAD

Masked load and store - consecutive vector load and store operations with additional mask operand tha...

Definition:ISDOpcodes.h:1360

llvm::ISD::VECREDUCE_SMIN

@ VECREDUCE_SMIN

Definition:ISDOpcodes.h:1450

llvm::ISD::SREM

@ SREM

Definition:ISDOpcodes.h:251

llvm::ISD::SMUL_LOHI

@ SMUL_LOHI

SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...

Definition:ISDOpcodes.h:257

llvm::ISD::ATOMIC_LOAD_NAND

@ ATOMIC_LOAD_NAND

Definition:ISDOpcodes.h:1340

llvm::ISD::UDIV

@ UDIV

Definition:ISDOpcodes.h:250

llvm::ISD::INSERT_SUBVECTOR

@ INSERT_SUBVECTOR

INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.

Definition:ISDOpcodes.h:574

llvm::ISD::UINT_TO_FP

@ UINT_TO_FP

Definition:ISDOpcodes.h:842

llvm::ISD::UMIN

@ UMIN

Definition:ISDOpcodes.h:699

llvm::ISD::BSWAP

@ BSWAP

Byte Swap and Counting operators.

Definition:ISDOpcodes.h:744

llvm::ISD::ROTR

@ ROTR

Definition:ISDOpcodes.h:739

llvm::ISD::FPOW

@ FPOW

Definition:ISDOpcodes.h:994

llvm::ISD::VAEND

@ VAEND

VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.

Definition:ISDOpcodes.h:1226

llvm::ISD::ConstantFP

@ ConstantFP

Definition:ISDOpcodes.h:77

llvm::ISD::ATOMIC_LOAD_MAX

@ ATOMIC_LOAD_MAX

Definition:ISDOpcodes.h:1342

llvm::ISD::UADDO

@ UADDO

Definition:ISDOpcodes.h:331

llvm::ISD::FTRUNC

@ FTRUNC

Definition:ISDOpcodes.h:1013

llvm::ISD::SDIV

@ SDIV

Definition:ISDOpcodes.h:249

llvm::ISD::STRICT_FCEIL

@ STRICT_FCEIL

Definition:ISDOpcodes.h:441

llvm::ISD::ATOMIC_LOAD_UMIN

@ ATOMIC_LOAD_UMIN

Definition:ISDOpcodes.h:1343

llvm::ISD::LLRINT

@ LLRINT

Definition:ISDOpcodes.h:1022

llvm::ISD::ADD

@ ADD

Simple integer binary arithmetic operators.

Definition:ISDOpcodes.h:246

llvm::ISD::LOAD

@ LOAD

LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...

Definition:ISDOpcodes.h:1102

llvm::ISD::STRICT_FMA

@ STRICT_FMA

Definition:ISDOpcodes.h:412

llvm::ISD::ANY_EXTEND

@ ANY_EXTEND

ANY_EXTEND - Used for integer types. The high bits are undefined.

Definition:ISDOpcodes.h:814

llvm::ISD::FSUB

@ FSUB

Definition:ISDOpcodes.h:398

llvm::ISD::FMA

@ FMA

FMA - Perform a * b + c with no intermediate rounding step.

Definition:ISDOpcodes.h:498

llvm::ISD::FABS

@ FABS

Definition:ISDOpcodes.h:982

llvm::ISD::FNEARBYINT

@ FNEARBYINT

Definition:ISDOpcodes.h:1015

llvm::ISD::INTRINSIC_VOID

@ INTRINSIC_VOID

OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...

Definition:ISDOpcodes.h:205

llvm::ISD::RETURNADDR

@ RETURNADDR

Definition:ISDOpcodes.h:101

llvm::ISD::GlobalAddress

@ GlobalAddress

Definition:ISDOpcodes.h:78

llvm::ISD::SINT_TO_FP

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

Definition:ISDOpcodes.h:841

llvm::ISD::CONCAT_VECTORS

@ CONCAT_VECTORS

CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...

Definition:ISDOpcodes.h:558

llvm::ISD::VECREDUCE_FMAX

@ VECREDUCE_FMAX

FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.

Definition:ISDOpcodes.h:1435

llvm::ISD::FADD

@ FADD

Simple binary floating point operators.

Definition:ISDOpcodes.h:397

llvm::ISD::VECREDUCE_FMAXIMUM

@ VECREDUCE_FMAXIMUM

FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....

Definition:ISDOpcodes.h:1439

llvm::ISD::ABS

@ ABS

ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.

Definition:ISDOpcodes.h:717

llvm::ISD::MEMBARRIER

@ MEMBARRIER

MEMBARRIER - Compiler barrier only; generate a no-op.

Definition:ISDOpcodes.h:1299

llvm::ISD::FEXP10

@ FEXP10

Definition:ISDOpcodes.h:1011

llvm::ISD::ATOMIC_FENCE

@ ATOMIC_FENCE

OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.

Definition:ISDOpcodes.h:1304

llvm::ISD::FP_TO_FP16

@ FP_TO_FP16

Definition:ISDOpcodes.h:965

llvm::ISD::UDIVREM

@ UDIVREM

Definition:ISDOpcodes.h:263

llvm::ISD::SDIVREM

@ SDIVREM

SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.

Definition:ISDOpcodes.h:262

llvm::ISD::VECREDUCE_SMAX

@ VECREDUCE_SMAX

Definition:ISDOpcodes.h:1449

llvm::ISD::SRL

@ SRL

Definition:ISDOpcodes.h:737

llvm::ISD::STRICT_FSETCCS

@ STRICT_FSETCCS

Definition:ISDOpcodes.h:492

llvm::ISD::FMAXIMUM

@ FMAXIMUM

Definition:ISDOpcodes.h:1051

llvm::ISD::FP16_TO_FP

@ FP16_TO_FP

FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...

Definition:ISDOpcodes.h:964

llvm::ISD::ATOMIC_LOAD_OR

@ ATOMIC_LOAD_OR

Definition:ISDOpcodes.h:1338

llvm::ISD::BITCAST

@ BITCAST

BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...

Definition:ISDOpcodes.h:954

llvm::ISD::STRICT_FDIV

@ STRICT_FDIV

Definition:ISDOpcodes.h:410

llvm::ISD::BUILD_PAIR

@ BUILD_PAIR

BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.

Definition:ISDOpcodes.h:236

llvm::ISD::ATOMIC_LOAD_XOR

@ ATOMIC_LOAD_XOR

Definition:ISDOpcodes.h:1339

llvm::ISD::FFLOOR

@ FFLOOR

Definition:ISDOpcodes.h:1018

llvm::ISD::INIT_TRAMPOLINE

@ INIT_TRAMPOLINE

INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.

Definition:ISDOpcodes.h:1270

llvm::ISD::FLDEXP

@ FLDEXP

FLDEXP - ldexp, inspired by libm (op0 * 2**op1).

Definition:ISDOpcodes.h:997

llvm::ISD::STRICT_FSQRT

@ STRICT_FSQRT

Constrained versions of libm-equivalent floating point intrinsics.

Definition:ISDOpcodes.h:418

llvm::ISD::BUILTIN_OP_END

@ BUILTIN_OP_END

BUILTIN_OP_END - This must be the last enum value in this list.

Definition:ISDOpcodes.h:1494

llvm::ISD::GlobalTLSAddress

@ GlobalTLSAddress

Definition:ISDOpcodes.h:79

llvm::ISD::SRA

@ SRA

Definition:ISDOpcodes.h:736

llvm::ISD::STRICT_FMUL

@ STRICT_FMUL

Definition:ISDOpcodes.h:409

llvm::ISD::LLROUND

@ LLROUND

Definition:ISDOpcodes.h:1020

llvm::ISD::SET_ROUNDING

@ SET_ROUNDING

Set rounding mode.

Definition:ISDOpcodes.h:936

llvm::ISD::USUBO

@ USUBO

Definition:ISDOpcodes.h:335

llvm::ISD::AVGFLOORU

@ AVGFLOORU

Definition:ISDOpcodes.h:681

llvm::ISD::SIGN_EXTEND

@ SIGN_EXTEND

Conversion operators.

Definition:ISDOpcodes.h:805

llvm::ISD::FLOG2

@ FLOG2

Definition:ISDOpcodes.h:1007

llvm::ISD::AVGCEILS

@ AVGCEILS

AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...

Definition:ISDOpcodes.h:685

llvm::ISD::STRICT_UINT_TO_FP

@ STRICT_UINT_TO_FP

Definition:ISDOpcodes.h:465

llvm::ISD::SCALAR_TO_VECTOR

@ SCALAR_TO_VECTOR

SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...

Definition:ISDOpcodes.h:635

llvm::ISD::READSTEADYCOUNTER

@ READSTEADYCOUNTER

READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.

Definition:ISDOpcodes.h:1259

llvm::ISD::VECREDUCE_FADD

@ VECREDUCE_FADD

These reductions have relaxed evaluation order semantics, and have a single vector operand.

Definition:ISDOpcodes.h:1432

llvm::ISD::UADDSAT

@ UADDSAT

Definition:ISDOpcodes.h:348

llvm::ISD::CTTZ_ZERO_UNDEF

@ CTTZ_ZERO_UNDEF

Bit counting operators with an undefined result for zero inputs.

Definition:ISDOpcodes.h:752

llvm::ISD::FMAXNUM

@ FMAXNUM

Definition:ISDOpcodes.h:1032

llvm::ISD::FPOWI

@ FPOWI

Definition:ISDOpcodes.h:995

llvm::ISD::FRINT

@ FRINT

Definition:ISDOpcodes.h:1014

llvm::ISD::PREFETCH

@ PREFETCH

PREFETCH - This corresponds to a prefetch intrinsic.

Definition:ISDOpcodes.h:1292

llvm::ISD::VECREDUCE_FMIN

@ VECREDUCE_FMIN

Definition:ISDOpcodes.h:1436

llvm::ISD::FSINCOS

@ FSINCOS

FSINCOS - Compute both fsin and fcos as a single operation.

Definition:ISDOpcodes.h:1059

llvm::ISD::STRICT_LROUND

@ STRICT_LROUND

Definition:ISDOpcodes.h:446

llvm::ISD::FNEG

@ FNEG

Perform various unary floating-point operations inspired by libm.

Definition:ISDOpcodes.h:981

llvm::ISD::BR_CC

@ BR_CC

BR_CC - Conditional branch.

Definition:ISDOpcodes.h:1148

llvm::ISD::CTTZ

@ CTTZ

Definition:ISDOpcodes.h:745

llvm::ISD::ATOMIC_LOAD_MIN

@ ATOMIC_LOAD_MIN

Definition:ISDOpcodes.h:1341

llvm::ISD::FP_TO_UINT

@ FP_TO_UINT

Definition:ISDOpcodes.h:888

llvm::ISD::BR_JT

@ BR_JT

BR_JT - Jumptable branch.

Definition:ISDOpcodes.h:1127

llvm::ISD::VECTOR_INTERLEAVE

@ VECTOR_INTERLEAVE

VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...

Definition:ISDOpcodes.h:601

llvm::ISD::STEP_VECTOR

@ STEP_VECTOR

STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...

Definition:ISDOpcodes.h:661

llvm::ISD::OR

@ OR

Definition:ISDOpcodes.h:710

llvm::ISD::FCANONICALIZE

@ FCANONICALIZE

Returns platform specific canonical encoding of a floating point number.

Definition:ISDOpcodes.h:515

llvm::ISD::IS_FPCLASS

@ IS_FPCLASS

Performs a check of floating point class property, defined by IEEE-754.

Definition:ISDOpcodes.h:522

llvm::ISD::SSUBSAT

@ SSUBSAT

RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...

Definition:ISDOpcodes.h:356

llvm::ISD::SRA_PARTS

@ SRA_PARTS

Definition:ISDOpcodes.h:795

llvm::ISD::SELECT

@ SELECT

Select(COND, TRUEVAL, FALSEVAL).

Definition:ISDOpcodes.h:757

llvm::ISD::UMUL_LOHI

@ UMUL_LOHI

Definition:ISDOpcodes.h:258

llvm::ISD::UNDEF

@ UNDEF

UNDEF - An undefined node.

Definition:ISDOpcodes.h:218

llvm::ISD::VECREDUCE_UMAX

@ VECREDUCE_UMAX

Definition:ISDOpcodes.h:1451

llvm::ISD::SPLAT_VECTOR

@ SPLAT_VECTOR

SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.

Definition:ISDOpcodes.h:642

llvm::ISD::VACOPY

@ VACOPY

VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...

Definition:ISDOpcodes.h:1222

llvm::ISD::AVGCEILU

@ AVGCEILU

Definition:ISDOpcodes.h:686

llvm::ISD::SADDO

@ SADDO

RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.

Definition:ISDOpcodes.h:330

llvm::ISD::FROUND

@ FROUND

Definition:ISDOpcodes.h:1016

llvm::ISD::STRICT_FTRUNC

@ STRICT_FTRUNC

Definition:ISDOpcodes.h:445

llvm::ISD::USUBSAT

@ USUBSAT

Definition:ISDOpcodes.h:357

llvm::ISD::VECREDUCE_ADD

@ VECREDUCE_ADD

Integer reductions may have a result type larger than the vector element type.

Definition:ISDOpcodes.h:1444

llvm::ISD::GET_ROUNDING

@ GET_ROUNDING

Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...

Definition:ISDOpcodes.h:931

llvm::ISD::STRICT_FP_TO_FP16

@ STRICT_FP_TO_FP16

Definition:ISDOpcodes.h:967

llvm::ISD::MULHU

@ MULHU

MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...

Definition:ISDOpcodes.h:674

llvm::ISD::STRICT_FP16_TO_FP

@ STRICT_FP16_TO_FP

Definition:ISDOpcodes.h:966

llvm::ISD::SHL

@ SHL

Shift and rotation operations.

Definition:ISDOpcodes.h:735

llvm::ISD::VECTOR_SHUFFLE

@ VECTOR_SHUFFLE

VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.

Definition:ISDOpcodes.h:615

llvm::ISD::ATOMIC_LOAD_AND

@ ATOMIC_LOAD_AND

Definition:ISDOpcodes.h:1336

llvm::ISD::EXTRACT_SUBVECTOR

@ EXTRACT_SUBVECTOR

EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.

Definition:ISDOpcodes.h:588

llvm::ISD::FCOS

@ FCOS

Definition:ISDOpcodes.h:986

llvm::ISD::XOR

@ XOR

Definition:ISDOpcodes.h:711

llvm::ISD::EXTRACT_VECTOR_ELT

@ EXTRACT_VECTOR_ELT

EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...

Definition:ISDOpcodes.h:550

llvm::ISD::CopyToReg

@ CopyToReg

CopyToReg - This node has three operands: a chain, a register number to set to this value,...

Definition:ISDOpcodes.h:209

llvm::ISD::ZERO_EXTEND

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

Definition:ISDOpcodes.h:811

llvm::ISD::DEBUGTRAP

@ DEBUGTRAP

DEBUGTRAP - Trap intended to get the attention of a debugger.

Definition:ISDOpcodes.h:1282

llvm::ISD::FP_TO_UINT_SAT

@ FP_TO_UINT_SAT

Definition:ISDOpcodes.h:907

llvm::ISD::CTPOP

@ CTPOP

Definition:ISDOpcodes.h:747

llvm::ISD::SELECT_CC

@ SELECT_CC

Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...

Definition:ISDOpcodes.h:772

llvm::ISD::FMUL

@ FMUL

Definition:ISDOpcodes.h:399

llvm::ISD::MSTORE

@ MSTORE

Definition:ISDOpcodes.h:1361

llvm::ISD::VSCALE

@ VSCALE

VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...

Definition:ISDOpcodes.h:1407

llvm::ISD::VECREDUCE_XOR

@ VECREDUCE_XOR

Definition:ISDOpcodes.h:1448

llvm::ISD::ATOMIC_CMP_SWAP

@ ATOMIC_CMP_SWAP

Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...

Definition:ISDOpcodes.h:1319

llvm::ISD::ATOMIC_LOAD_UMAX

@ ATOMIC_LOAD_UMAX

Definition:ISDOpcodes.h:1344

llvm::ISD::SRL_PARTS

@ SRL_PARTS

Definition:ISDOpcodes.h:796

llvm::ISD::FMINNUM

@ FMINNUM

FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.

Definition:ISDOpcodes.h:1031

llvm::ISD::SUB

@ SUB

Definition:ISDOpcodes.h:247

llvm::ISD::MULHS

@ MULHS

Definition:ISDOpcodes.h:675

llvm::ISD::VECREDUCE_AND

@ VECREDUCE_AND

Definition:ISDOpcodes.h:1446

llvm::ISD::DYNAMIC_STACKALLOC

@ DYNAMIC_STACKALLOC

DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.

Definition:ISDOpcodes.h:1112

llvm::ISD::STRICT_LRINT

@ STRICT_LRINT

Definition:ISDOpcodes.h:448

llvm::ISD::ConstantPool

@ ConstantPool

Definition:ISDOpcodes.h:82

llvm::ISD::SIGN_EXTEND_INREG

@ SIGN_EXTEND_INREG

SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...

Definition:ISDOpcodes.h:849

llvm::ISD::SMIN

@ SMIN

[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.

Definition:ISDOpcodes.h:697

llvm::ISD::Constant

@ Constant

Definition:ISDOpcodes.h:76

llvm::ISD::VECTOR_REVERSE

@ VECTOR_REVERSE

VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...

Definition:ISDOpcodes.h:606

llvm::ISD::FP_EXTEND

@ FP_EXTEND

X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.

Definition:ISDOpcodes.h:939

llvm::ISD::VECREDUCE_OR

@ VECREDUCE_OR

Definition:ISDOpcodes.h:1447

llvm::ISD::STRICT_FROUND

@ STRICT_FROUND

Definition:ISDOpcodes.h:443

llvm::ISD::VSELECT

@ VSELECT

Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...

Definition:ISDOpcodes.h:766

llvm::ISD::MSCATTER

@ MSCATTER

Definition:ISDOpcodes.h:1373

llvm::ISD::STRICT_SINT_TO_FP

@ STRICT_SINT_TO_FP

STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.

Definition:ISDOpcodes.h:464

llvm::ISD::FROUNDEVEN

@ FROUNDEVEN

Definition:ISDOpcodes.h:1017

llvm::ISD::MGATHER

@ MGATHER

Masked gather and scatter - load and store operations for a vector of random addresses with additiona...

Definition:ISDOpcodes.h:1372

llvm::ISD::VECREDUCE_UMIN

@ VECREDUCE_UMIN

Definition:ISDOpcodes.h:1452

llvm::ISD::STRICT_FFLOOR

@ STRICT_FFLOOR

Definition:ISDOpcodes.h:442

llvm::ISD::STRICT_FROUNDEVEN

@ STRICT_FROUNDEVEN

Definition:ISDOpcodes.h:444

llvm::ISD::EH_DWARF_CFA

@ EH_DWARF_CFA

EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...

Definition:ISDOpcodes.h:135

llvm::ISD::FDIV

@ FDIV

Definition:ISDOpcodes.h:400

llvm::ISD::BF16_TO_FP

@ BF16_TO_FP

BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.

Definition:ISDOpcodes.h:973

llvm::ISD::FRAMEADDR

@ FRAMEADDR

FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.

Definition:ISDOpcodes.h:100

llvm::ISD::FREM

@ FREM

Definition:ISDOpcodes.h:401

llvm::ISD::ATOMIC_LOAD_ADD

@ ATOMIC_LOAD_ADD

Definition:ISDOpcodes.h:1334

llvm::ISD::STRICT_FP_TO_UINT

@ STRICT_FP_TO_UINT

Definition:ISDOpcodes.h:458

llvm::ISD::STRICT_FP_ROUND

@ STRICT_FP_ROUND

X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...

Definition:ISDOpcodes.h:480

llvm::ISD::STRICT_FP_TO_SINT

@ STRICT_FP_TO_SINT

STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

Definition:ISDOpcodes.h:457

llvm::ISD::FMINIMUM

@ FMINIMUM

FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....

Definition:ISDOpcodes.h:1050

llvm::ISD::ATOMIC_LOAD_SUB

@ ATOMIC_LOAD_SUB

Definition:ISDOpcodes.h:1335

llvm::ISD::FP_TO_SINT

@ FP_TO_SINT

FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

Definition:ISDOpcodes.h:887

llvm::ISD::READCYCLECOUNTER

@ READCYCLECOUNTER

READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.

Definition:ISDOpcodes.h:1253

llvm::ISD::STRICT_FP_EXTEND

@ STRICT_FP_EXTEND

X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.

Definition:ISDOpcodes.h:485

llvm::ISD::AND

@ AND

Bitwise operators - logical and, logical or, logical xor.

Definition:ISDOpcodes.h:709

llvm::ISD::TRAP

@ TRAP

TRAP - Trapping instruction.

Definition:ISDOpcodes.h:1279

llvm::ISD::INTRINSIC_WO_CHAIN

@ INTRINSIC_WO_CHAIN

RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...

Definition:ISDOpcodes.h:190

llvm::ISD::FLOG

@ FLOG

Definition:ISDOpcodes.h:1006

llvm::ISD::AVGFLOORS

@ AVGFLOORS

AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...

Definition:ISDOpcodes.h:680

llvm::ISD::STRICT_FADD

@ STRICT_FADD

Constrained versions of the binary floating point operators.

Definition:ISDOpcodes.h:407

llvm::ISD::UREM

@ UREM

Definition:ISDOpcodes.h:252

llvm::ISD::SPLAT_VECTOR_PARTS

@ SPLAT_VECTOR_PARTS

SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...

Definition:ISDOpcodes.h:651

llvm::ISD::INSERT_VECTOR_ELT

@ INSERT_VECTOR_ELT

INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.

Definition:ISDOpcodes.h:539

llvm::ISD::TokenFactor

@ TokenFactor

TokenFactor - This node takes multiple tokens as input and produces a single token result.

Definition:ISDOpcodes.h:52

llvm::ISD::FSIN

@ FSIN

Definition:ISDOpcodes.h:985

llvm::ISD::STRICT_LLRINT

@ STRICT_LLRINT

Definition:ISDOpcodes.h:449

llvm::ISD::FEXP

@ FEXP

Definition:ISDOpcodes.h:1009

llvm::ISD::VECTOR_SPLICE

@ VECTOR_SPLICE

VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...

Definition:ISDOpcodes.h:627

llvm::ISD::FCEIL

@ FCEIL

Definition:ISDOpcodes.h:1012

llvm::ISD::STRICT_FSUB

@ STRICT_FSUB

Definition:ISDOpcodes.h:408

llvm::ISD::ATOMIC_SWAP

@ ATOMIC_SWAP

Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...

Definition:ISDOpcodes.h:1333

llvm::ISD::MUL

@ MUL

Definition:ISDOpcodes.h:248

llvm::ISD::FFREXP

@ FFREXP

FFREXP - frexp, extract fractional and exponent component of a floating-point value.

Definition:ISDOpcodes.h:1004

llvm::ISD::FP_ROUND

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

Definition:ISDOpcodes.h:920

llvm::ISD::VECTOR_COMPRESS

@ VECTOR_COMPRESS

VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....

Definition:ISDOpcodes.h:669

llvm::ISD::LROUND

@ LROUND

Definition:ISDOpcodes.h:1019

llvm::ISD::CTLZ

@ CTLZ

Definition:ISDOpcodes.h:746

llvm::ISD::FMAXIMUMNUM

@ FMAXIMUMNUM

Definition:ISDOpcodes.h:1056

llvm::ISD::CLEAR_CACHE

@ CLEAR_CACHE

Definition:ISDOpcodes.h:1490

llvm::ISD::STRICT_FLDEXP

@ STRICT_FLDEXP

Definition:ISDOpcodes.h:421

llvm::ISD::STRICT_LLROUND

@ STRICT_LLROUND

Definition:ISDOpcodes.h:447

llvm::ISD::VASTART

@ VASTART

Definition:ISDOpcodes.h:1227

llvm::ISD::FSQRT

@ FSQRT

Definition:ISDOpcodes.h:983

llvm::ISD::STRICT_FNEARBYINT

@ STRICT_FNEARBYINT

Definition:ISDOpcodes.h:438

llvm::ISD::FP_TO_SINT_SAT

@ FP_TO_SINT_SAT

FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...

Definition:ISDOpcodes.h:906

llvm::ISD::VECREDUCE_FMINIMUM

@ VECREDUCE_FMINIMUM

Definition:ISDOpcodes.h:1440

llvm::ISD::TRUNCATE

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

Definition:ISDOpcodes.h:817

llvm::ISD::VAARG

@ VAARG

VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.

Definition:ISDOpcodes.h:1217

llvm::ISD::BRCOND

@ BRCOND

BRCOND - Conditional branch.

Definition:ISDOpcodes.h:1141

llvm::ISD::ROTL

@ ROTL

Definition:ISDOpcodes.h:738

llvm::ISD::BlockAddress

@ BlockAddress

Definition:ISDOpcodes.h:84

llvm::ISD::SHL_PARTS

@ SHL_PARTS

SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.

Definition:ISDOpcodes.h:794

llvm::ISD::BITREVERSE

@ BITREVERSE

Definition:ISDOpcodes.h:748

llvm::ISD::FCOPYSIGN

@ FCOPYSIGN

FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.

Definition:ISDOpcodes.h:508

llvm::ISD::SADDSAT

@ SADDSAT

RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...

Definition:ISDOpcodes.h:347

llvm::ISD::FEXP2

@ FEXP2

Definition:ISDOpcodes.h:1010

llvm::ISD::SMAX

@ SMAX

Definition:ISDOpcodes.h:698

llvm::ISD::STRICT_FRINT

@ STRICT_FRINT

Definition:ISDOpcodes.h:437

llvm::ISD::VECTOR_DEINTERLEAVE

@ VECTOR_DEINTERLEAVE

VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...

Definition:ISDOpcodes.h:595

llvm::ISD::UMAX

@ UMAX

Definition:ISDOpcodes.h:700

llvm::ISD::FMINIMUMNUM

@ FMINIMUMNUM

FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...

Definition:ISDOpcodes.h:1055

llvm::ISD::TRUNCATE_SSAT_S

@ TRUNCATE_SSAT_S

TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...

Definition:ISDOpcodes.h:832

llvm::ISD::ABDS

@ ABDS

ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...

Definition:ISDOpcodes.h:692

llvm::ISD::ADJUST_TRAMPOLINE

@ ADJUST_TRAMPOLINE

ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.

Definition:ISDOpcodes.h:1276

llvm::ISD::TRUNCATE_USAT_U

@ TRUNCATE_USAT_U

Definition:ISDOpcodes.h:836

llvm::ISD::INTRINSIC_W_CHAIN

@ INTRINSIC_W_CHAIN

RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...

Definition:ISDOpcodes.h:198

llvm::ISD::ABDU

@ ABDU

Definition:ISDOpcodes.h:693

llvm::ISD::BUILD_VECTOR

@ BUILD_VECTOR

BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...

Definition:ISDOpcodes.h:530

llvm::ISD::isBuildVectorOfConstantSDNodes

bool isBuildVectorOfConstantSDNodes(const SDNode *N)

Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.

Definition:SelectionDAG.cpp:287

llvm::ISD::isNormalStore

bool isNormalStore(const SDNode *N)

Returns true if the specified node is a non-truncating and unindexed store.

Definition:SelectionDAGNodes.h:3246

llvm::ISD::isConstantSplatVectorAllZeros

bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)

Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...

Definition:SelectionDAG.cpp:237

llvm::ISD::getSetCCInverse

CondCode getSetCCInverse(CondCode Operation, EVT Type)

Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.

Definition:SelectionDAG.cpp:639

llvm::ISD::getVPMaskIdx

std::optional< unsigned > getVPMaskIdx(unsigned Opcode)

The operand position of the vector mask.

Definition:SelectionDAG.cpp:553

llvm::ISD::getVPExplicitVectorLengthIdx

std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)

The operand position of the explicit vector length parameter.

Definition:SelectionDAG.cpp:565

llvm::ISD::getSetCCSwappedOperands

CondCode getSetCCSwappedOperands(CondCode Operation)

Return the operation corresponding to (Y op X) when given the operation for (X op Y).

Definition:SelectionDAG.cpp:616

llvm::ISD::MemIndexType

MemIndexType

MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...

Definition:ISDOpcodes.h:1572

llvm::ISD::UNSIGNED_SCALED

@ UNSIGNED_SCALED

Definition:ISDOpcodes.h:1572

llvm::ISD::isBuildVectorAllZeros

bool isBuildVectorAllZeros(const SDNode *N)

Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.

Definition:SelectionDAG.cpp:283

llvm::ISD::isConstantSplatVector

bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)

Node predicates.

Definition:SelectionDAG.cpp:153

llvm::ISD::MemIndexedMode

MemIndexedMode

MemIndexedMode enum - This enum defines the load / store indexed addressing modes.

Definition:ISDOpcodes.h:1559

llvm::ISD::POST_INC

@ POST_INC

Definition:ISDOpcodes.h:1559

llvm::ISD::PRE_INC

@ PRE_INC

Definition:ISDOpcodes.h:1559

llvm::ISD::UNINDEXED

@ UNINDEXED

Definition:ISDOpcodes.h:1559

llvm::ISD::isBuildVectorOfConstantFPSDNodes

bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)

Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.

Definition:SelectionDAG.cpp:300

llvm::ISD::CondCode

CondCode

ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...

Definition:ISDOpcodes.h:1610

llvm::ISD::SETOEQ

@ SETOEQ

Definition:ISDOpcodes.h:1613

llvm::ISD::SETUNE

@ SETUNE

Definition:ISDOpcodes.h:1626

llvm::ISD::SETUEQ

@ SETUEQ

Definition:ISDOpcodes.h:1621

llvm::ISD::SETOLE

@ SETOLE

Definition:ISDOpcodes.h:1617

llvm::ISD::SETOLT

@ SETOLT

Definition:ISDOpcodes.h:1616

llvm::ISD::SETNE

@ SETNE

Definition:ISDOpcodes.h:1635

llvm::ISD::SETUGT

@ SETUGT

Definition:ISDOpcodes.h:1622

llvm::ISD::SETOGT

@ SETOGT

Definition:ISDOpcodes.h:1614

llvm::ISD::SETULT

@ SETULT

Definition:ISDOpcodes.h:1624

llvm::ISD::SETUO

@ SETUO

Definition:ISDOpcodes.h:1620

llvm::ISD::SETONE

@ SETONE

Definition:ISDOpcodes.h:1618

llvm::ISD::SETGT

@ SETGT

Definition:ISDOpcodes.h:1631

llvm::ISD::SETLT

@ SETLT

Definition:ISDOpcodes.h:1633

llvm::ISD::SETO

@ SETO

Definition:ISDOpcodes.h:1619

llvm::ISD::SETGE

@ SETGE

Definition:ISDOpcodes.h:1632

llvm::ISD::SETUGE

@ SETUGE

Definition:ISDOpcodes.h:1623

llvm::ISD::SETLE

@ SETLE

Definition:ISDOpcodes.h:1634

llvm::ISD::SETULE

@ SETULE

Definition:ISDOpcodes.h:1625

llvm::ISD::SETOGE

@ SETOGE

Definition:ISDOpcodes.h:1615

llvm::ISD::SETEQ

@ SETEQ

Definition:ISDOpcodes.h:1630

llvm::ISD::isBuildVectorAllOnes

bool isBuildVectorAllOnes(const SDNode *N)

Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.

Definition:SelectionDAG.cpp:279

llvm::ISD::getVecReduceBaseOpcode

NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)

Get underlying scalar opcode for VECREDUCE opcode.

Definition:SelectionDAG.cpp:448

llvm::ISD::LoadExtType

LoadExtType

LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).

Definition:ISDOpcodes.h:1590

llvm::ISD::NON_EXTLOAD

@ NON_EXTLOAD

Definition:ISDOpcodes.h:1590

llvm::ISD::SEXTLOAD

@ SEXTLOAD

Definition:ISDOpcodes.h:1590

llvm::ISD::ZEXTLOAD

@ ZEXTLOAD

Definition:ISDOpcodes.h:1590

llvm::ISD::EXTLOAD

@ EXTLOAD

Definition:ISDOpcodes.h:1590

llvm::ISD::isVPOpcode

bool isVPOpcode(unsigned Opcode)

Whether this is a vector-predicated Opcode.

Definition:SelectionDAG.cpp:504

llvm::ISD::isNormalLoad

bool isNormalLoad(const SDNode *N)

Returns true if the specified node is a non-extending and unindexed load.

Definition:SelectionDAGNodes.h:3208

llvm::ISD::isIntEqualitySetCC

bool isIntEqualitySetCC(CondCode Code)

Return true if this is a setcc instruction that performs an equality comparison when used with intege...

Definition:ISDOpcodes.h:1655

llvm::Intrinsic::getOrInsertDeclaration

Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

Definition:Intrinsics.cpp:732

llvm::Intrinsic::ID

unsigned ID

Definition:GenericSSAContext.h:28

llvm::LegacyLegalizeActions::Bitcast

@ Bitcast

Perform the operation on a different, but equivalently sized type.

Definition:LegacyLegalizerInfo.h:55

llvm::M68k::MemAddrModeKind::V

@ V

llvm::M68k::MemAddrModeKind::L

@ L

llvm::MipsISD::Ext

@ Ext

Definition:MipsISelLowering.h:157

llvm::NVPTX::PTXLdStInstCode::Scalar

@ Scalar

Definition:NVPTX.h:162

llvm::NVPTX::PTXLdStInstCode::V2

@ V2

Definition:NVPTX.h:163

llvm::NVPTX::Const

@ Const

Definition:NVPTX.h:147

llvm::RISCVABI::ABI

ABI

Definition:RISCVBaseInfo.h:503

llvm::RISCVABI::ABI_ILP32D

@ ABI_ILP32D

Definition:RISCVBaseInfo.h:506

llvm::RISCVABI::ABI_LP64F

@ ABI_LP64F

Definition:RISCVBaseInfo.h:509

llvm::RISCVABI::ABI_ILP32F

@ ABI_ILP32F

Definition:RISCVBaseInfo.h:505

llvm::RISCVABI::ABI_ILP32

@ ABI_ILP32

Definition:RISCVBaseInfo.h:504

llvm::RISCVABI::ABI_Unknown

@ ABI_Unknown

Definition:RISCVBaseInfo.h:512

llvm::RISCVABI::ABI_ILP32E

@ ABI_ILP32E

Definition:RISCVBaseInfo.h:507

llvm::RISCVABI::ABI_LP64E

@ ABI_LP64E

Definition:RISCVBaseInfo.h:511

llvm::RISCVABI::ABI_LP64

@ ABI_LP64

Definition:RISCVBaseInfo.h:508

llvm::RISCVABI::ABI_LP64D

@ ABI_LP64D

Definition:RISCVBaseInfo.h:510

llvm::RISCVCC::CondCode

CondCode

Definition:RISCVInstrInfo.h:37

llvm::RISCVFPRndMode::RoundingMode

RoundingMode

Definition:RISCVBaseInfo.h:380

llvm::RISCVFPRndMode::RUP

@ RUP

Definition:RISCVBaseInfo.h:384

llvm::RISCVFPRndMode::DYN

@ DYN

Definition:RISCVBaseInfo.h:386

llvm::RISCVFPRndMode::RTZ

@ RTZ

Definition:RISCVBaseInfo.h:382

llvm::RISCVFPRndMode::RDN

@ RDN

Definition:RISCVBaseInfo.h:383

llvm::RISCVFPRndMode::RMM

@ RMM

Definition:RISCVBaseInfo.h:385

llvm::RISCVFPRndMode::Invalid

@ Invalid

Definition:RISCVBaseInfo.h:387

llvm::RISCVFPRndMode::RNE

@ RNE

Definition:RISCVBaseInfo.h:381

llvm::RISCVII::MASK_AGNOSTIC

@ MASK_AGNOSTIC

Definition:RISCVTargetParser.h:83

llvm::RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED

@ TAIL_UNDISTURBED_MASK_UNDISTURBED

Definition:RISCVTargetParser.h:81

llvm::RISCVII::TAIL_AGNOSTIC

@ TAIL_AGNOSTIC

Definition:RISCVTargetParser.h:82

llvm::RISCVII::getLMul

static VLMUL getLMul(uint64_t TSFlags)

Definition:RISCVBaseInfo.h:148

llvm::RISCVII::MO_TPREL_HI

@ MO_TPREL_HI

Definition:RISCVBaseInfo.h:279

llvm::RISCVII::MO_CALL

@ MO_CALL

Definition:RISCVBaseInfo.h:272

llvm::RISCVII::MO_TPREL_LO

@ MO_TPREL_LO

Definition:RISCVBaseInfo.h:278

llvm::RISCVII::MO_HI

@ MO_HI

Definition:RISCVBaseInfo.h:274

llvm::RISCVII::MO_LO

@ MO_LO

Definition:RISCVBaseInfo.h:273

llvm::RISCVII::MO_TPREL_ADD

@ MO_TPREL_ADD

Definition:RISCVBaseInfo.h:280

llvm::RISCVII::getFRMOpNum

static int getFRMOpNum(const MCInstrDesc &Desc)

Definition:RISCVBaseInfo.h:233

llvm::RISCVII::VLMUL

VLMUL

Definition:RISCVTargetParser.h:69

llvm::RISCVII::LMUL_1

@ LMUL_1

Definition:RISCVTargetParser.h:70

llvm::RISCVII::LMUL_F8

@ LMUL_F8

Definition:RISCVTargetParser.h:75

llvm::RISCVII::LMUL_4

@ LMUL_4

Definition:RISCVTargetParser.h:72

llvm::RISCVII::LMUL_8

@ LMUL_8

Definition:RISCVTargetParser.h:73

llvm::RISCVII::LMUL_F4

@ LMUL_F4

Definition:RISCVTargetParser.h:76

llvm::RISCVII::LMUL_F2

@ LMUL_F2

Definition:RISCVTargetParser.h:77

llvm::RISCVII::LMUL_2

@ LMUL_2

Definition:RISCVTargetParser.h:71

llvm::RISCVII::getSEWOpNum

static unsigned getSEWOpNum(const MCInstrDesc &Desc)

Definition:RISCVBaseInfo.h:217

llvm::RISCVISD::NodeType

NodeType

Definition:RISCVISelLowering.h:31

llvm::RISCVISD::SF_VC_V_FV_SE

@ SF_VC_V_FV_SE

Definition:RISCVISelLowering.h:454

llvm::RISCVISD::SRLW

@ SRLW

Definition:RISCVISelLowering.h:94

llvm::RISCVISD::SplitF64

@ SplitF64

Turns a f64 into a pair of i32s.

Definition:RISCVISelLowering.h:69

llvm::RISCVISD::DIVW

@ DIVW

Definition:RISCVISelLowering.h:98

llvm::RISCVISD::SW_GUARDED_BRIND

@ SW_GUARDED_BRIND

Definition:RISCVISelLowering.h:432

llvm::RISCVISD::SF_VC_V_X_SE

@ SF_VC_V_X_SE

Definition:RISCVISelLowering.h:449

llvm::RISCVISD::SELECT_CC

@ SELECT_CC

Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...

Definition:RISCVISelLowering.h:44

llvm::RISCVISD::SF_VC_V_VVW_SE

@ SF_VC_V_VVW_SE

Definition:RISCVISelLowering.h:461

llvm::RISCVISD::SHA256SIG0

@ SHA256SIG0

Definition:RISCVISelLowering.h:172

llvm::RISCVISD::VWMULU_VL

@ VWMULU_VL

Definition:RISCVISelLowering.h:348

llvm::RISCVISD::SLLW

@ SLLW

Definition:RISCVISelLowering.h:92

llvm::RISCVISD::WRITE_CSR

@ WRITE_CSR

Definition:RISCVISelLowering.h:418

llvm::RISCVISD::FSGNJX

@ FSGNJX

Definition:RISCVISelLowering.h:150

llvm::RISCVISD::VMV_V_X_VL

@ VMV_V_X_VL

Definition:RISCVISelLowering.h:188

llvm::RISCVISD::VWSUB_W_VL

@ VWSUB_W_VL

Definition:RISCVISelLowering.h:356

llvm::RISCVISD::VMSET_VL

@ VMSET_VL

Definition:RISCVISelLowering.h:387

llvm::RISCVISD::SUB_VL

@ SUB_VL

Definition:RISCVISelLowering.h:276

llvm::RISCVISD::FABS_VL

@ FABS_VL

Definition:RISCVISelLowering.h:316

llvm::RISCVISD::VWADDU_VL

@ VWADDU_VL

Definition:RISCVISelLowering.h:351

llvm::RISCVISD::FCLASS_VL

@ FCLASS_VL

Definition:RISCVISelLowering.h:318

llvm::RISCVISD::STRICT_VFNMSUB_VL

@ STRICT_VFNMSUB_VL

Definition:RISCVISelLowering.h:485

llvm::RISCVISD::STRICT_UINT_TO_FP_VL

@ STRICT_UINT_TO_FP_VL

Definition:RISCVISelLowering.h:490

llvm::RISCVISD::CTTZ_VL

@ CTTZ_VL

Definition:RISCVISelLowering.h:288

llvm::RISCVISD::STRICT_SINT_TO_FP_VL

@ STRICT_SINT_TO_FP_VL

Definition:RISCVISelLowering.h:489

llvm::RISCVISD::CZERO_EQZ

@ CZERO_EQZ

Definition:RISCVISelLowering.h:427

llvm::RISCVISD::SM4ED

@ SM4ED

Definition:RISCVISelLowering.h:173

llvm::RISCVISD::FMUL_VL

@ FMUL_VL

Definition:RISCVISelLowering.h:309

llvm::RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL

@ STRICT_VFCVT_RTZ_XU_F_VL

Definition:RISCVISelLowering.h:493

llvm::RISCVISD::VSLIDE1DOWN_VL

@ VSLIDE1DOWN_VL

Definition:RISCVISelLowering.h:223

llvm::RISCVISD::STRICT_FSETCC_VL

@ STRICT_FSETCC_VL

Definition:RISCVISelLowering.h:494

llvm::RISCVISD::RET_GLUE

@ RET_GLUE

Definition:RISCVISelLowering.h:33

llvm::RISCVISD::FROUND

@ FROUND

Definition:RISCVISelLowering.h:147

llvm::RISCVISD::CTZW

@ CTZW

Definition:RISCVISelLowering.h:108

llvm::RISCVISD::VWSLL_VL

@ VWSLL_VL

Definition:RISCVISelLowering.h:358

llvm::RISCVISD::ROLW

@ ROLW

Definition:RISCVISelLowering.h:103

llvm::RISCVISD::VSEXT_VL

@ VSEXT_VL

Definition:RISCVISelLowering.h:396

llvm::RISCVISD::SMIN_VL

@ SMIN_VL

Definition:RISCVISelLowering.h:280

llvm::RISCVISD::VMAND_VL

@ VMAND_VL

Definition:RISCVISelLowering.h:381

llvm::RISCVISD::FP_EXTEND_VL

@ FP_EXTEND_VL

Definition:RISCVISelLowering.h:330

llvm::RISCVISD::CLMUL

@ CLMUL

Definition:RISCVISelLowering.h:171

llvm::RISCVISD::FCLASS

@ FCLASS

Definition:RISCVISelLowering.h:149

llvm::RISCVISD::VECREDUCE_UMAX_VL

@ VECREDUCE_UMAX_VL

Definition:RISCVISelLowering.h:251

llvm::RISCVISD::CTLZ_VL

@ CTLZ_VL

Definition:RISCVISelLowering.h:287

llvm::RISCVISD::TAIL

@ TAIL

Definition:RISCVISelLowering.h:37

llvm::RISCVISD::VECREDUCE_XOR_VL

@ VECREDUCE_XOR_VL

Definition:RISCVISelLowering.h:257

llvm::RISCVISD::MUL_VL

@ MUL_VL

Definition:RISCVISelLowering.h:267

llvm::RISCVISD::VFSLIDE1UP_VL

@ VFSLIDE1UP_VL

Definition:RISCVISelLowering.h:228

llvm::RISCVISD::TRUNCATE_VECTOR_VL_USAT

@ TRUNCATE_VECTOR_VL_USAT

Definition:RISCVISelLowering.h:212

llvm::RISCVISD::OR_VL

@ OR_VL

Definition:RISCVISelLowering.h:268

llvm::RISCVISD::VFMV_V_F_VL

@ VFMV_V_F_VL

Definition:RISCVISelLowering.h:192

llvm::RISCVISD::SHA256SUM0

@ SHA256SUM0

Definition:RISCVISelLowering.h:172

llvm::RISCVISD::ORC_B

@ ORC_B

Definition:RISCVISelLowering.h:166

llvm::RISCVISD::RORW

@ RORW

Definition:RISCVISelLowering.h:104

llvm::RISCVISD::TH_SWD

@ TH_SWD

Definition:RISCVISelLowering.h:503

llvm::RISCVISD::FMV_X_SIGNEXTH

@ FMV_X_SIGNEXTH

Definition:RISCVISelLowering.h:126

llvm::RISCVISD::VFMV_S_F_VL

@ VFMV_S_F_VL

Definition:RISCVISelLowering.h:199

llvm::RISCVISD::CLMULR

@ CLMULR

Definition:RISCVISelLowering.h:171

llvm::RISCVISD::HI

@ HI

Definition:RISCVISelLowering.h:74

llvm::RISCVISD::VWSUBU_VL

@ VWSUBU_VL

Definition:RISCVISelLowering.h:353

llvm::RISCVISD::VECREDUCE_ADD_VL

@ VECREDUCE_ADD_VL

Definition:RISCVISelLowering.h:250

llvm::RISCVISD::VMV_X_S

@ VMV_X_S

Definition:RISCVISelLowering.h:195

llvm::RISCVISD::SHA256SUM1

@ SHA256SUM1

Definition:RISCVISelLowering.h:172

llvm::RISCVISD::VECREDUCE_OR_VL

@ VECREDUCE_OR_VL

Definition:RISCVISelLowering.h:256

llvm::RISCVISD::SF_VC_V_FVV_SE

@ SF_VC_V_FVV_SE

Definition:RISCVISelLowering.h:458

llvm::RISCVISD::VMERGE_VL

@ VMERGE_VL

Definition:RISCVISelLowering.h:378

llvm::RISCVISD::FIRST_STRICTFP_OPCODE

@ FIRST_STRICTFP_OPCODE

Definition:RISCVISelLowering.h:474

llvm::RISCVISD::UINT_TO_FP_VL

@ UINT_TO_FP_VL

Definition:RISCVISelLowering.h:326

llvm::RISCVISD::SF_VC_V_XVW_SE

@ SF_VC_V_XVW_SE

Definition:RISCVISelLowering.h:459

llvm::RISCVISD::VWADD_W_VL

@ VWADD_W_VL

Definition:RISCVISelLowering.h:354

llvm::RISCVISD::VFMADD_VL

@ VFMADD_VL

Definition:RISCVISelLowering.h:333

llvm::RISCVISD::VRGATHER_VX_VL

@ VRGATHER_VX_VL

Definition:RISCVISelLowering.h:391

llvm::RISCVISD::MOPR

@ MOPR

Definition:RISCVISelLowering.h:177

llvm::RISCVISD::VSLIDEDOWN_VL

@ VSLIDEDOWN_VL

Definition:RISCVISelLowering.h:218

llvm::RISCVISD::VMV_S_X_VL

@ VMV_S_X_VL

Definition:RISCVISelLowering.h:197

llvm::RISCVISD::CLZW

@ CLZW

Definition:RISCVISelLowering.h:107

llvm::RISCVISD::SINT_TO_FP_VL

@ SINT_TO_FP_VL

Definition:RISCVISelLowering.h:325

llvm::RISCVISD::BR_CC

@ BR_CC

Definition:RISCVISelLowering.h:45

llvm::RISCVISD::VID_VL

@ VID_VL

Definition:RISCVISelLowering.h:232

llvm::RISCVISD::SF_VC_V_XVV_SE

@ SF_VC_V_XVV_SE

Definition:RISCVISelLowering.h:455

llvm::RISCVISD::VMOR_VL

@ VMOR_VL

Definition:RISCVISelLowering.h:382

llvm::RISCVISD::SMAX_VL

@ SMAX_VL

Definition:RISCVISelLowering.h:281

llvm::RISCVISD::FCVT_W_RV64

@ FCVT_W_RV64

Definition:RISCVISelLowering.h:139

llvm::RISCVISD::VWADD_VL

@ VWADD_VL

Definition:RISCVISelLowering.h:350

llvm::RISCVISD::XOR_VL

@ XOR_VL

Definition:RISCVISelLowering.h:279

llvm::RISCVISD::SRAW

@ SRAW

Definition:RISCVISelLowering.h:93

llvm::RISCVISD::SF_VC_FVW_SE

@ SF_VC_FVW_SE

Definition:RISCVISelLowering.h:448

llvm::RISCVISD::SF_VC_V_FVW_SE

@ SF_VC_V_FVW_SE

Definition:RISCVISelLowering.h:462

llvm::RISCVISD::FCVT_WU_RV64

@ FCVT_WU_RV64

Definition:RISCVISelLowering.h:140

llvm::RISCVISD::SF_VC_V_VV_SE

@ SF_VC_V_VV_SE

Definition:RISCVISelLowering.h:453

llvm::RISCVISD::VWSUBU_W_VL

@ VWSUBU_W_VL

Definition:RISCVISelLowering.h:357

llvm::RISCVISD::SF_VC_V_I_SE

@ SF_VC_V_I_SE

Definition:RISCVISelLowering.h:450

llvm::RISCVISD::SF_VC_VVV_SE

@ SF_VC_VVV_SE

Definition:RISCVISelLowering.h:443

llvm::RISCVISD::STRICT_FCVT_W_RV64

@ STRICT_FCVT_W_RV64

Definition:RISCVISelLowering.h:475

llvm::RISCVISD::VFWSUB_W_VL

@ VFWSUB_W_VL

Definition:RISCVISelLowering.h:364

llvm::RISCVISD::BuildPairF64

@ BuildPairF64

Turns a pair of i32s into an f64.

Definition:RISCVISelLowering.h:63

llvm::RISCVISD::VWMUL_VL

@ VWMUL_VL

Definition:RISCVISelLowering.h:347

llvm::RISCVISD::VMXOR_VL

@ VMXOR_VL

Definition:RISCVISelLowering.h:383

llvm::RISCVISD::VECREDUCE_FADD_VL

@ VECREDUCE_FADD_VL

Definition:RISCVISelLowering.h:258

llvm::RISCVISD::SF_VC_V_IVV_SE

@ SF_VC_V_IVV_SE

Definition:RISCVISelLowering.h:456

llvm::RISCVISD::SF_VC_V_IVW_SE

@ SF_VC_V_IVW_SE

Definition:RISCVISelLowering.h:460

llvm::RISCVISD::BuildGPRPair

@ BuildGPRPair

Turn a pair of i<xlen>s into an even-odd register pair (untyped).

Definition:RISCVISelLowering.h:51

llvm::RISCVISD::VWMACCU_VL

@ VWMACCU_VL

Definition:RISCVISelLowering.h:369

llvm::RISCVISD::CZERO_NEZ

@ CZERO_NEZ

Definition:RISCVISelLowering.h:428

llvm::RISCVISD::LLA

@ LLA

Definition:RISCVISelLowering.h:77

llvm::RISCVISD::STRICT_FSETCCS_VL

@ STRICT_FSETCCS_VL

Definition:RISCVISelLowering.h:495

llvm::RISCVISD::SHL_VL

@ SHL_VL

Definition:RISCVISelLowering.h:270

llvm::RISCVISD::STRICT_VFMADD_VL

@ STRICT_VFMADD_VL

Definition:RISCVISelLowering.h:482

llvm::RISCVISD::SHA256SIG1

@ SHA256SIG1

Definition:RISCVISelLowering.h:172

llvm::RISCVISD::FSQRT_VL

@ FSQRT_VL

Definition:RISCVISelLowering.h:317

llvm::RISCVISD::FMV_H_X

@ FMV_H_X

Definition:RISCVISelLowering.h:124

llvm::RISCVISD::SF_VC_XV_SE

@ SF_VC_XV_SE

Definition:RISCVISelLowering.h:437

llvm::RISCVISD::VFWMUL_VL

@ VFWMUL_VL

Definition:RISCVISelLowering.h:360

llvm::RISCVISD::VWMACC_VL

@ VWMACC_VL

Definition:RISCVISelLowering.h:368

llvm::RISCVISD::VFIRST_VL

@ VFIRST_VL

Definition:RISCVISelLowering.h:403

llvm::RISCVISD::VWMULSU_VL

@ VWMULSU_VL

Definition:RISCVISelLowering.h:349

llvm::RISCVISD::VWSUB_VL

@ VWSUB_VL

Definition:RISCVISelLowering.h:352

llvm::RISCVISD::VFWNMSUB_VL

@ VFWNMSUB_VL

Definition:RISCVISelLowering.h:343

llvm::RISCVISD::VFCVT_RM_XU_F_VL

@ VFCVT_RM_XU_F_VL

Definition:RISCVISelLowering.h:324

llvm::RISCVISD::VECREDUCE_SMAX_VL

@ VECREDUCE_SMAX_VL

Definition:RISCVISelLowering.h:252

llvm::RISCVISD::SRA_VL

@ SRA_VL

Definition:RISCVISelLowering.h:272

llvm::RISCVISD::TH_LWUD

@ TH_LWUD

Definition:RISCVISelLowering.h:501

llvm::RISCVISD::SM3P1

@ SM3P1

Definition:RISCVISelLowering.h:174

llvm::RISCVISD::VECREDUCE_FMAX_VL

@ VECREDUCE_FMAX_VL

Definition:RISCVISelLowering.h:261

llvm::RISCVISD::VECREDUCE_AND_VL

@ VECREDUCE_AND_VL

Definition:RISCVISelLowering.h:255

llvm::RISCVISD::VFNCVT_ROD_VL

@ VFNCVT_ROD_VL

Definition:RISCVISelLowering.h:237

llvm::RISCVISD::LAST_VL_VECTOR_OP

@ LAST_VL_VECTOR_OP

Definition:RISCVISelLowering.h:405

llvm::RISCVISD::STRICT_VFROUND_NOEXCEPT_VL

@ STRICT_VFROUND_NOEXCEPT_VL

Definition:RISCVISelLowering.h:496

llvm::RISCVISD::VFWMSUB_VL

@ VFWMSUB_VL

Definition:RISCVISelLowering.h:342

llvm::RISCVISD::VMV_V_V_VL

@ VMV_V_V_VL

Definition:RISCVISelLowering.h:184

llvm::RISCVISD::FMAX

@ FMAX

Definition:RISCVISelLowering.h:153

llvm::RISCVISD::VSLIDE1UP_VL

@ VSLIDE1UP_VL

Definition:RISCVISelLowering.h:222

llvm::RISCVISD::VFMAX_VL

@ VFMAX_VL

Definition:RISCVISelLowering.h:312

llvm::RISCVISD::STRICT_VFNMADD_VL

@ STRICT_VFNMADD_VL

Definition:RISCVISelLowering.h:483

llvm::RISCVISD::VFCVT_RM_X_F_VL

@ VFCVT_RM_X_F_VL

Definition:RISCVISelLowering.h:323

llvm::RISCVISD::VCPOP_VL

@ VCPOP_VL

Definition:RISCVISelLowering.h:400

llvm::RISCVISD::SF_VC_FV_SE

@ SF_VC_FV_SE

Definition:RISCVISelLowering.h:440

llvm::RISCVISD::CALL

@ CALL

Definition:RISCVISelLowering.h:36

llvm::RISCVISD::READ_VLENB

@ READ_VLENB

Definition:RISCVISelLowering.h:408

llvm::RISCVISD::FCVT_X

@ FCVT_X

Definition:RISCVISelLowering.h:133

llvm::RISCVISD::READ_CSR

@ READ_CSR

Definition:RISCVISelLowering.h:413

llvm::RISCVISD::VWMACCSU_VL

@ VWMACCSU_VL

Definition:RISCVISelLowering.h:370

llvm::RISCVISD::FIRST_NUMBER

@ FIRST_NUMBER

Definition:RISCVISelLowering.h:32

llvm::RISCVISD::AND_VL

@ AND_VL

Definition:RISCVISelLowering.h:266

llvm::RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL

@ SPLAT_VECTOR_SPLIT_I64_VL

Definition:RISCVISelLowering.h:203

llvm::RISCVISD::SM4KS

@ SM4KS

Definition:RISCVISelLowering.h:173

llvm::RISCVISD::SplitGPRPair

@ SplitGPRPair

Turn an even-odd register pair (untyped) into a pair of i<xlen>s.

Definition:RISCVISelLowering.h:57

llvm::RISCVISD::VFSLIDE1DOWN_VL

@ VFSLIDE1DOWN_VL

Definition:RISCVISelLowering.h:229

llvm::RISCVISD::READ_COUNTER_WIDE

@ READ_COUNTER_WIDE

Definition:RISCVISelLowering.h:161

llvm::RISCVISD::TRUNCATE_VECTOR_VL

@ TRUNCATE_VECTOR_VL

Definition:RISCVISelLowering.h:206

llvm::RISCVISD::MOPRR

@ MOPRR

Definition:RISCVISelLowering.h:177

llvm::RISCVISD::SF_VC_V_VVV_SE

@ SF_VC_V_VVV_SE

Definition:RISCVISelLowering.h:457

llvm::RISCVISD::TH_LWD

@ TH_LWD

Definition:RISCVISelLowering.h:500

llvm::RISCVISD::VZEXT_VL

@ VZEXT_VL

Definition:RISCVISelLowering.h:397

llvm::RISCVISD::ADD_TPREL

@ ADD_TPREL

Definition:RISCVISelLowering.h:80

llvm::RISCVISD::TRUNCATE_VECTOR_VL_SSAT

@ TRUNCATE_VECTOR_VL_SSAT

Definition:RISCVISelLowering.h:211

llvm::RISCVISD::STRICT_FADD_VL

@ STRICT_FADD_VL

Definition:RISCVISelLowering.h:477

llvm::RISCVISD::SF_VC_XVW_SE

@ SF_VC_XVW_SE

Definition:RISCVISelLowering.h:445

llvm::RISCVISD::SETCC_VL

@ SETCC_VL

Definition:RISCVISelLowering.h:374

llvm::RISCVISD::FIRST_VL_VECTOR_OP

@ FIRST_VL_VECTOR_OP

Definition:RISCVISelLowering.h:180

llvm::RISCVISD::UNZIP

@ UNZIP

Definition:RISCVISelLowering.h:168

llvm::RISCVISD::STRICT_VFCVT_RTZ_X_F_VL

@ STRICT_VFCVT_RTZ_X_F_VL

Definition:RISCVISelLowering.h:492

llvm::RISCVISD::FLI

@ FLI

Definition:RISCVISelLowering.h:156

llvm::RISCVISD::STRICT_FDIV_VL

@ STRICT_FDIV_VL

Definition:RISCVISelLowering.h:480

llvm::RISCVISD::UMIN_VL

@ UMIN_VL

Definition:RISCVISelLowering.h:282

llvm::RISCVISD::FSUB_VL

@ FSUB_VL

Definition:RISCVISelLowering.h:308

llvm::RISCVISD::VFCVT_RTZ_XU_F_VL

@ VFCVT_RTZ_XU_F_VL

Definition:RISCVISelLowering.h:321

llvm::RISCVISD::FMV_X_ANYEXTH

@ FMV_X_ANYEXTH

Definition:RISCVISelLowering.h:125

llvm::RISCVISD::VFCVT_RM_F_XU_VL

@ VFCVT_RM_F_XU_VL

Definition:RISCVISelLowering.h:328

llvm::RISCVISD::STRICT_VFCVT_RM_X_F_VL

@ STRICT_VFCVT_RM_X_F_VL

Definition:RISCVISelLowering.h:491

llvm::RISCVISD::ZIP

@ ZIP

Definition:RISCVISelLowering.h:167

llvm::RISCVISD::VFNMSUB_VL

@ VFNMSUB_VL

Definition:RISCVISelLowering.h:336

llvm::RISCVISD::LAST_STRICTFP_OPCODE

@ LAST_STRICTFP_OPCODE

Definition:RISCVISelLowering.h:497

llvm::RISCVISD::VFMSUB_VL

@ VFMSUB_VL

Definition:RISCVISelLowering.h:335

llvm::RISCVISD::SF_VC_VV_SE

@ SF_VC_VV_SE

Definition:RISCVISelLowering.h:439

llvm::RISCVISD::TH_SDD

@ TH_SDD

Definition:RISCVISelLowering.h:504

llvm::RISCVISD::VFWADD_VL

@ VFWADD_VL

Definition:RISCVISelLowering.h:361

llvm::RISCVISD::SF_VC_V_IV_SE

@ SF_VC_V_IV_SE

Definition:RISCVISelLowering.h:452

llvm::RISCVISD::SF_VC_VVW_SE

@ SF_VC_VVW_SE

Definition:RISCVISelLowering.h:447

llvm::RISCVISD::SRL_VL

@ SRL_VL

Definition:RISCVISelLowering.h:273

llvm::RISCVISD::SF_VC_IVW_SE

@ SF_VC_IVW_SE

Definition:RISCVISelLowering.h:446

llvm::RISCVISD::FADD_VL

@ FADD_VL

Definition:RISCVISelLowering.h:307

llvm::RISCVISD::STRICT_FP_EXTEND_VL

@ STRICT_FP_EXTEND_VL

Definition:RISCVISelLowering.h:487

llvm::RISCVISD::STRICT_FP_ROUND_VL

@ STRICT_FP_ROUND_VL

Definition:RISCVISelLowering.h:486

llvm::RISCVISD::VECREDUCE_UMIN_VL

@ VECREDUCE_UMIN_VL

Definition:RISCVISelLowering.h:253

llvm::RISCVISD::SF_VC_XVV_SE

@ SF_VC_XVV_SE

Definition:RISCVISelLowering.h:441

llvm::RISCVISD::FCOPYSIGN_VL

@ FCOPYSIGN_VL

Definition:RISCVISelLowering.h:319

llvm::RISCVISD::VRGATHER_VV_VL

@ VRGATHER_VV_VL

Definition:RISCVISelLowering.h:392

llvm::RISCVISD::DIVUW

@ DIVUW

Definition:RISCVISelLowering.h:99

llvm::RISCVISD::FMIN

@ FMIN

Definition:RISCVISelLowering.h:153

llvm::RISCVISD::VFWMADD_VL

@ VFWMADD_VL

Definition:RISCVISelLowering.h:340

llvm::RISCVISD::VRGATHEREI16_VV_VL

@ VRGATHEREI16_VV_VL

Definition:RISCVISelLowering.h:393

llvm::RISCVISD::VWADDU_W_VL

@ VWADDU_W_VL

Definition:RISCVISelLowering.h:355

llvm::RISCVISD::STRICT_VFMSUB_VL

@ STRICT_VFMSUB_VL

Definition:RISCVISelLowering.h:484

llvm::RISCVISD::SM3P0

@ SM3P0

Definition:RISCVISelLowering.h:174

llvm::RISCVISD::VMCLR_VL

@ VMCLR_VL

Definition:RISCVISelLowering.h:386

llvm::RISCVISD::VFROUND_NOEXCEPT_VL

@ VFROUND_NOEXCEPT_VL

Definition:RISCVISelLowering.h:322

llvm::RISCVISD::MULHSU

@ MULHSU

Definition:RISCVISelLowering.h:83

llvm::RISCVISD::SF_VC_V_XV_SE

@ SF_VC_V_XV_SE

Definition:RISCVISelLowering.h:451

llvm::RISCVISD::REMUW

@ REMUW

Definition:RISCVISelLowering.h:100

llvm::RISCVISD::MRET_GLUE

@ MRET_GLUE

Definition:RISCVISelLowering.h:35

llvm::RISCVISD::SW_GUARDED_CALL

@ SW_GUARDED_CALL

Definition:RISCVISelLowering.h:434

llvm::RISCVISD::CLMULH

@ CLMULH

Definition:RISCVISelLowering.h:171

llvm::RISCVISD::SRET_GLUE

@ SRET_GLUE

Definition:RISCVISelLowering.h:34

llvm::RISCVISD::VFWSUB_VL

@ VFWSUB_VL

Definition:RISCVISelLowering.h:362

llvm::RISCVISD::ABSW

@ ABSW

Definition:RISCVISelLowering.h:111

llvm::RISCVISD::VFCVT_RTZ_X_F_VL

@ VFCVT_RTZ_X_F_VL

Definition:RISCVISelLowering.h:320

llvm::RISCVISD::ADD_VL

@ ADD_VL

Definition:RISCVISelLowering.h:265

llvm::RISCVISD::PROBED_ALLOCA

@ PROBED_ALLOCA

Definition:RISCVISelLowering.h:466

llvm::RISCVISD::VECREDUCE_SMIN_VL

@ VECREDUCE_SMIN_VL

Definition:RISCVISelLowering.h:254

llvm::RISCVISD::TUPLE_INSERT

@ TUPLE_INSERT

Definition:RISCVISelLowering.h:469

llvm::RISCVISD::VECREDUCE_FMIN_VL

@ VECREDUCE_FMIN_VL

Definition:RISCVISelLowering.h:260

llvm::RISCVISD::VFMIN_VL

@ VFMIN_VL

Definition:RISCVISelLowering.h:311

llvm::RISCVISD::SF_VC_IV_SE

@ SF_VC_IV_SE

Definition:RISCVISelLowering.h:438

llvm::RISCVISD::FCVT_XU

@ FCVT_XU

Definition:RISCVISelLowering.h:134

llvm::RISCVISD::VFNMADD_VL

@ VFNMADD_VL

Definition:RISCVISelLowering.h:334

llvm::RISCVISD::BREV8

@ BREV8

Definition:RISCVISelLowering.h:165

llvm::RISCVISD::SF_VC_FVV_SE

@ SF_VC_FVV_SE

Definition:RISCVISelLowering.h:444

llvm::RISCVISD::VECREDUCE_SEQ_FADD_VL

@ VECREDUCE_SEQ_FADD_VL

Definition:RISCVISelLowering.h:259

llvm::RISCVISD::STRICT_FCVT_WU_RV64

@ STRICT_FCVT_WU_RV64

Definition:RISCVISelLowering.h:476

llvm::RISCVISD::SHL_ADD

@ SHL_ADD

Definition:RISCVISelLowering.h:88

llvm::RISCVISD::FMV_W_X_RV64

@ FMV_W_X_RV64

Definition:RISCVISelLowering.h:127

llvm::RISCVISD::FNEG_VL

@ FNEG_VL

Definition:RISCVISelLowering.h:315

llvm::RISCVISD::TUPLE_EXTRACT

@ TUPLE_EXTRACT

Definition:RISCVISelLowering.h:470

llvm::RISCVISD::STRICT_VFNCVT_ROD_VL

@ STRICT_VFNCVT_ROD_VL

Definition:RISCVISelLowering.h:488

llvm::RISCVISD::ADD_LO

@ ADD_LO

Definition:RISCVISelLowering.h:72

llvm::RISCVISD::VFWNMADD_VL

@ VFWNMADD_VL

Definition:RISCVISelLowering.h:341

llvm::RISCVISD::SF_VC_IVV_SE

@ SF_VC_IVV_SE

Definition:RISCVISelLowering.h:442

llvm::RISCVISD::TH_LDD

@ TH_LDD

Definition:RISCVISelLowering.h:502

llvm::RISCVISD::VFWADD_W_VL

@ VFWADD_W_VL

Definition:RISCVISelLowering.h:363

llvm::RISCVISD::FMV_X_ANYEXTW_RV64

@ FMV_X_ANYEXTW_RV64

Definition:RISCVISelLowering.h:128

llvm::RISCVISD::VSLIDEUP_VL

@ VSLIDEUP_VL

Definition:RISCVISelLowering.h:217

llvm::RISCVISD::SW_GUARDED_TAIL

@ SW_GUARDED_TAIL

Definition:RISCVISelLowering.h:435

llvm::RISCVISD::FP_ROUND_VL

@ FP_ROUND_VL

Definition:RISCVISelLowering.h:329

llvm::RISCVLoadFPImm::getLoadFPImm

int getLoadFPImm(APFloat FPImm)

getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.

Definition:RISCVBaseInfo.cpp:164

llvm::RISCVMatInt::generateInstSeq

InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)

Definition:RISCVMatInt.cpp:227

llvm::RISCVMatInt::getIntMatCost

int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)

Definition:RISCVMatInt.cpp:501

llvm::RISCVMatInt::generateTwoRegInstSeq

InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)

Definition:RISCVMatInt.cpp:468

llvm::RISCVVIntrinsicsTable

Definition:RISCVISelLowering.cpp:22846

llvm::RISCVVType::decodeVSEW

static unsigned decodeVSEW(unsigned VSEW)

Definition:RISCVTargetParser.h:115

llvm::RISCVVType::decodeVLMUL

std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)

Definition:RISCVTargetParser.cpp:182

llvm::RISCVVType::encodeLMUL

static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)

Definition:RISCVTargetParser.h:109

llvm::RISCVVType::encodeSEW

static unsigned encodeSEW(unsigned SEW)

Definition:RISCVTargetParser.h:120

llvm::RISCV::FPMASK_Negative_Zero

static constexpr unsigned FPMASK_Negative_Zero

Definition:RISCVInstrInfo.h:363

llvm::RISCV::FPMASK_Positive_Subnormal

static constexpr unsigned FPMASK_Positive_Subnormal

Definition:RISCVInstrInfo.h:365

llvm::RISCV::FPMASK_Positive_Normal

static constexpr unsigned FPMASK_Positive_Normal

Definition:RISCVInstrInfo.h:366

llvm::RISCV::FPMASK_Negative_Subnormal

static constexpr unsigned FPMASK_Negative_Subnormal

Definition:RISCVInstrInfo.h:362

llvm::RISCV::FPMASK_Negative_Normal

static constexpr unsigned FPMASK_Negative_Normal

Definition:RISCVInstrInfo.h:361

llvm::RISCV::FPMASK_Positive_Infinity

static constexpr unsigned FPMASK_Positive_Infinity

Definition:RISCVInstrInfo.h:367

llvm::RISCV::getNamedOperandIdx

int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)

llvm::RISCV::FPMASK_Negative_Infinity

static constexpr unsigned FPMASK_Negative_Infinity

Definition:RISCVInstrInfo.h:360

llvm::RISCV::FPMASK_Quiet_NaN

static constexpr unsigned FPMASK_Quiet_NaN

Definition:RISCVInstrInfo.h:369

llvm::RISCV::getArgGPRs

ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)

Definition:RISCVCallingConv.cpp:126

llvm::RISCV::FPMASK_Signaling_NaN

static constexpr unsigned FPMASK_Signaling_NaN

Definition:RISCVInstrInfo.h:368

llvm::RISCV::FPMASK_Positive_Zero

static constexpr unsigned FPMASK_Positive_Zero

Definition:RISCVInstrInfo.h:364

llvm::RISCV::RVVBitsPerBlock

static constexpr unsigned RVVBitsPerBlock

Definition:RISCVTargetParser.h:51

llvm::RTLIB::Libcall

Libcall

RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.

Definition:RuntimeLibcalls.h:33

llvm::RTLIB::getFPTOUINT

Libcall getFPTOUINT(EVT OpVT, EVT RetVT)

getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.

Definition:TargetLoweringBase.cpp:251

llvm::RTLIB::getFPTOSINT

Libcall getFPTOSINT(EVT OpVT, EVT RetVT)

getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.

Definition:TargetLoweringBase.cpp:202

llvm::RTLIB::getFPROUND

Libcall getFPROUND(EVT OpVT, EVT RetVT)

getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.

Definition:TargetLoweringBase.cpp:155

llvm::RegState::Kill

@ Kill

The last use of a register.

Definition:MachineInstrBuilder.h:50

llvm::Reloc::Model

Model

Definition:CodeGen.h:25

llvm::SPII::Store

@ Store

Definition:SparcInstrInfo.h:33

llvm::SPII::Load

@ Load

Definition:SparcInstrInfo.h:32

llvm::Sched::Source

@ Source

Definition:TargetLowering.h:102

llvm::SyncScope::SingleThread

@ SingleThread

Synchronized with respect to signal handlers executing in the same thread.

Definition:LLVMContext.h:54

llvm::SyncScope::System

@ System

Synchronized with respect to all concurrently executing threads.

Definition:LLVMContext.h:57

llvm::TLSModel::Model

Model

Definition:CodeGen.h:45

llvm::TLSModel::LocalDynamic

@ LocalDynamic

Definition:CodeGen.h:47

llvm::TLSModel::InitialExec

@ InitialExec

Definition:CodeGen.h:48

llvm::TLSModel::GeneralDynamic

@ GeneralDynamic

Definition:CodeGen.h:46

llvm::TLSModel::LocalExec

@ LocalExec

Definition:CodeGen.h:49

llvm::X86Disassembler::Reg

Reg

All possible values of the reg field in the ModR/M byte.

Definition:X86DisassemblerDecoder.h:621

llvm::X86::FirstMacroFusionInstKind::Cmp

@ Cmp

llvm::bitc::NoNaNs

@ NoNaNs

Definition:LLVMBitCodes.h:527

llvm::cfg::UpdateKind::Insert

@ Insert

llvm::cl::Hidden

@ Hidden

Definition:CommandLine.h:137

llvm::cl::init

initializer< Ty > init(const Ty &Val)

Definition:CommandLine.h:443

llvm::codeview::CompileSym3Flags::Exp

@ Exp

llvm::codeview::EncodedFramePtrReg::BasePtr

@ BasePtr

llvm::dwarf::Index

Index

Definition:Dwarf.h:882

llvm::logicalview::LVAttributeKind::Zero

@ Zero

llvm::ms_demangle::QualifierMangleMode::Result

@ Result

llvm::pdb::DbgHeaderType::Max

@ Max

llvm::sampleprof::Base

@ Base

Definition:Discriminator.h:58

llvm::support::endian::read32le

uint32_t read32le(const void *P)

Definition:Endian.h:425

llvm::tgtok::TrueVal

@ TrueVal

Definition:TGLexer.h:58

llvm::tgtok::FalseVal

@ FalseVal

Definition:TGLexer.h:59

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition:AddressRanges.h:18

llvm::next_nodbg

IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)

Increment It, then continue incrementing it while it points to a debug instruction.

Definition:MachineBasicBlock.h:1440

llvm::CC_RISCV_GHC

bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)

Definition:RISCVCallingConv.cpp:727

llvm::Offset

@ Offset

Definition:DWP.cpp:480

llvm::MONontemporalBit1

static const MachineMemOperand::Flags MONontemporalBit1

Definition:RISCVInstrInfo.h:32

llvm::BuildMI

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

Definition:MachineInstrBuilder.h:373

llvm::isNullConstant

bool isNullConstant(SDValue V)

Returns true if V is a constant integer zero.

Definition:SelectionDAG.cpp:12205

llvm::Depth

@ Depth

Definition:SIMachineScheduler.h:36

llvm::LoopIdiomVectorizeStyle::Masked

@ Masked

llvm::enumerate

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

Definition:STLExtras.h:2448

llvm::createRISCVMCCodeEmitter

MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)

Definition:RISCVMCCodeEmitter.cpp:105

llvm::bit_width

int bit_width(T Value)

Returns the number of bits needed to represent Value if Value is nonzero.

Definition:bit.h:317

llvm::MONontemporalBit0

static const MachineMemOperand::Flags MONontemporalBit0

Definition:RISCVInstrInfo.h:30

llvm::alignDown

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

Definition:MathExtras.h:556

llvm::isPowerOf2_64

constexpr bool isPowerOf2_64(uint64_t Value)

Return true if the argument is a power of two > 0 (64 bit edition.)

Definition:MathExtras.h:297

llvm::widenShuffleMaskElts

bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...

Definition:VectorUtils.cpp:431

llvm::getSplatValue

Value * getSplatValue(const Value *V)

Get splat value if the input is a splat vector or return nullptr.

Definition:VectorUtils.cpp:312

llvm::isNullOrNullSplat

bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)

Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...

Definition:Utils.cpp:1547

llvm::Log2_64

unsigned Log2_64(uint64_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

Definition:MathExtras.h:347

llvm::PowerOf2Ceil

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

Definition:MathExtras.h:395

llvm::countr_zero

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

Definition:bit.h:215

llvm::isReleaseOrStronger

bool isReleaseOrStronger(AtomicOrdering AO)

Definition:AtomicOrdering.h:133

llvm::getOffset

static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)

Definition:RuntimeDyld.cpp:172

llvm::transform

OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)

Wrapper function around std::transform to apply a function to a range and store the result elsewhere.

Definition:STLExtras.h:1952

llvm::any_of

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

Definition:STLExtras.h:1746

llvm::Log2_32

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

Definition:MathExtras.h:341

llvm::isPowerOf2_32

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

Definition:MathExtras.h:292

llvm::ComplexDeinterleavingOperation::Splat

@ Splat

llvm::fcNegSubnormal

@ fcNegSubnormal

Definition:FloatingPointMode.h:246

llvm::fcPosNormal

@ fcPosNormal

Definition:FloatingPointMode.h:250

llvm::fcQNan

@ fcQNan

Definition:FloatingPointMode.h:243

llvm::fcNegZero

@ fcNegZero

Definition:FloatingPointMode.h:247

llvm::fcNegInf

@ fcNegInf

Definition:FloatingPointMode.h:244

llvm::fcPosZero

@ fcPosZero

Definition:FloatingPointMode.h:248

llvm::fcSNan

@ fcSNan

Definition:FloatingPointMode.h:242

llvm::fcNegNormal

@ fcNegNormal

Definition:FloatingPointMode.h:245

llvm::fcPosSubnormal

@ fcPosSubnormal

Definition:FloatingPointMode.h:249

llvm::fcPosInf

@ fcPosInf

Definition:FloatingPointMode.h:251

llvm::dbgs

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition:Debug.cpp:163

llvm::report_fatal_error

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

Definition:Error.cpp:167

llvm::CC_RISCV_FastCC

bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)

Definition:RISCVCallingConv.cpp:606

llvm::isMask_64

constexpr bool isMask_64(uint64_t Value)

Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...

Definition:MathExtras.h:274

llvm::CaptureComponents::Address

@ Address

llvm::isOneOrOneSplat

bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)

Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...

Definition:SelectionDAG.cpp:12414

llvm::errs

raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

Definition:raw_ostream.cpp:907

llvm::PackElem::Hi

@ Hi

llvm::PackElem::Lo

@ Lo

llvm::AtomicOrdering

AtomicOrdering

Atomic ordering for LLVM's memory model.

Definition:AtomicOrdering.h:56

llvm::AtomicOrdering::Acquire

@ Acquire

llvm::AtomicOrdering::Release

@ Release

llvm::AtomicOrdering::SequentiallyConsistent

@ SequentiallyConsistent

llvm::divideCeil

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

Definition:MathExtras.h:404

llvm::IRMemLocation::First

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

llvm::CombineLevel

CombineLevel

Definition:DAGCombine.h:15

llvm::narrowShuffleMaskElts

void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...

Definition:VectorUtils.cpp:410

llvm::RecurKind::Xor

@ Xor

Bitwise or logical XOR of integers.

llvm::RecurKind::And

@ And

Bitwise or logical AND of integers.

llvm::RecurKind::SMin

@ SMin

Signed integer min implemented in terms of select(cmp()).

llvm::getKillRegState

unsigned getKillRegState(bool B)

Definition:MachineInstrBuilder.h:555

llvm::Op

DWARFExpression::Operation Op

Definition:DWARFExpression.cpp:22

llvm::RoundingMode

RoundingMode

Rounding mode.

Definition:FloatingPointMode.h:37

llvm::RoundingMode::TowardZero

@ TowardZero

roundTowardZero.

llvm::RoundingMode::NearestTiesToEven

@ NearestTiesToEven

roundTiesToEven.

llvm::RoundingMode::TowardPositive

@ TowardPositive

roundTowardPositive.

llvm::RoundingMode::NearestTiesToAway

@ NearestTiesToAway

roundTiesToAway.

llvm::RoundingMode::TowardNegative

@ TowardNegative

roundTowardNegative.

llvm::isConstOrConstSplat

ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)

Returns the SDNode if it is a constant splat BuildVector or constant int.

Definition:SelectionDAG.cpp:12331

llvm::isAcquireOrStronger

bool isAcquireOrStronger(AtomicOrdering AO)

Definition:AtomicOrdering.h:129

llvm::BitWidth

constexpr unsigned BitWidth

Definition:BitmaskEnum.h:217

llvm::PseudoProbeReservedId::Last

@ Last

llvm::count_if

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

Definition:STLExtras.h:1945

llvm::RISCVCCAssignFn

bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)

RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...

Definition:RISCVCallingConv.h:21

llvm::isOneConstant

bool isOneConstant(SDValue V)

Returns true if V is a constant integer one.

Definition:SelectionDAG.cpp:12224

llvm::is_contained

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Definition:STLExtras.h:1903

llvm::processShuffleMasks

void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)

Splits and processes shuffle mask depending on the number of input and output registers.

Definition:VectorUtils.cpp:556

llvm::Log2

unsigned Log2(Align A)

Returns the log2 of the alignment.

Definition:Alignment.h:208

llvm::Cost

InstructionCost Cost

Definition:FunctionSpecialization.h:102

llvm::CC_RISCV

bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)

Definition:RISCVCallingConv.cpp:324

llvm::Data

@ Data

Definition:SIMachineScheduler.h:55

llvm::createSequentialMask

llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)

Create a sequential shuffle mask.

Definition:VectorUtils.cpp:1040

llvm::fltNanEncoding::AllOnes

@ AllOnes

llvm::isNeutralConstant

bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)

Returns true if V is a neutral element of Opc with Flags.

Definition:SelectionDAG.cpp:12234

llvm::isAllOnesConstant

bool isAllOnesConstant(SDValue V)

Returns true if V is an integer constant with all bits set.

Definition:SelectionDAG.cpp:12219

std::swap

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

Definition:BitVector.h:860

raw_ostream.h

#define N

#define NC

Definition:regutils.h:42

InsertionPoint

Definition:CFIFixup.cpp:129

RegInfo

Definition:AMDGPUAsmParser.cpp:2770

VIDSequence

Definition:RISCVISelLowering.cpp:3373

VIDSequence::StepNumerator

int64_t StepNumerator

Definition:RISCVISelLowering.cpp:3374

VIDSequence::Addend

int64_t Addend

Definition:RISCVISelLowering.cpp:3376

VIDSequence::StepDenominator

unsigned StepDenominator

Definition:RISCVISelLowering.cpp:3375

llvm::APFloatBase::rmNearestTiesToEven

static constexpr roundingMode rmNearestTiesToEven

Definition:APFloat.h:302

llvm::APFloatBase::semanticsPrecision

static unsigned int semanticsPrecision(const fltSemantics &)

Definition:APFloat.cpp:315

llvm::APFloatBase::opInvalidOp

@ opInvalidOp

Definition:APFloat.h:320

llvm::Align

This struct is a compact representation of a valid (non-zero power of two) alignment.

Definition:Alignment.h:39

llvm::Align::value

uint64_t value() const

This is a hole in the type system and should not be abused.

Definition:Alignment.h:85

llvm::EVT

Extended Value Type.

Definition:ValueTypes.h:35

llvm::EVT::changeVectorElementTypeToInteger

EVT changeVectorElementTypeToInteger() const

Return a vector with the same number of elements as this vector, but with the element type converted ...

Definition:ValueTypes.h:94

llvm::EVT::getStoreSize

TypeSize getStoreSize() const

Return the number of bytes overwritten by a store of the specified value type.

Definition:ValueTypes.h:390

llvm::EVT::isSimple

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

Definition:ValueTypes.h:137

llvm::EVT::getVectorVT

static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)

Returns the EVT that represents a vector NumElements in length, where each element is of type VT.

Definition:ValueTypes.h:74

llvm::EVT::getScalarStoreSize

uint64_t getScalarStoreSize() const

Definition:ValueTypes.h:397

llvm::EVT::bitsGT

bool bitsGT(EVT VT) const

Return true if this has more bits than VT.

Definition:ValueTypes.h:279

llvm::EVT::bitsLT

bool bitsLT(EVT VT) const

Return true if this has less bits than VT.

Definition:ValueTypes.h:295

llvm::EVT::isFloatingPoint

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

Definition:ValueTypes.h:147

llvm::EVT::getVectorElementCount

ElementCount getVectorElementCount() const

Definition:ValueTypes.h:345

llvm::EVT::getSizeInBits

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

Definition:ValueTypes.h:368

llvm::EVT::isByteSized

bool isByteSized() const

Return true if the bit size is a multiple of 8.

Definition:ValueTypes.h:238

llvm::EVT::getVectorMinNumElements

unsigned getVectorMinNumElements() const

Given a vector type, return the minimum number of elements it contains.

Definition:ValueTypes.h:354

llvm::EVT::getRISCVVectorTupleNumFields

unsigned getRISCVVectorTupleNumFields() const

Given a RISCV vector tuple type, return the num_fields.

Definition:ValueTypes.h:359

llvm::EVT::getScalarSizeInBits

uint64_t getScalarSizeInBits() const

Definition:ValueTypes.h:380

llvm::EVT::getHalfSizedIntegerVT

EVT getHalfSizedIntegerVT(LLVMContext &Context) const

Finds the smallest simple value type that is greater than or equal to half the width of this EVT.

Definition:ValueTypes.h:425

llvm::EVT::getSimpleVT

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

Definition:ValueTypes.h:311

llvm::EVT::getIntegerVT

static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)

Returns the EVT that represents an integer with the given number of bits.

Definition:ValueTypes.h:65

llvm::EVT::isRISCVVectorTuple

bool isRISCVVectorTuple() const

Return true if this is a vector value type.

Definition:ValueTypes.h:179

llvm::EVT::getFixedSizeInBits

uint64_t getFixedSizeInBits() const

Return the size of the specified fixed width value type in bits.

Definition:ValueTypes.h:376

llvm::EVT::isFixedLengthVector

bool isFixedLengthVector() const

Definition:ValueTypes.h:181

llvm::EVT::getRoundIntegerType

EVT getRoundIntegerType(LLVMContext &Context) const

Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...

Definition:ValueTypes.h:414

llvm::EVT::isVector

bool isVector() const

Return true if this is a vector value type.

Definition:ValueTypes.h:168

llvm::EVT::getScalarType

EVT getScalarType() const

If this is a vector type, return the element type, otherwise return this.

Definition:ValueTypes.h:318

llvm::EVT::bitsGE

bool bitsGE(EVT VT) const

Return true if this has no less bits than VT.

Definition:ValueTypes.h:287

llvm::EVT::getTypeForEVT

Type * getTypeForEVT(LLVMContext &Context) const

This method returns an LLVM type corresponding to the specified EVT.

Definition:ValueTypes.cpp:210

llvm::EVT::isScalableVector

bool isScalableVector() const

Return true if this is a vector type where the runtime length is machine dependent.

Definition:ValueTypes.h:174

llvm::EVT::getVectorElementType

EVT getVectorElementType() const

Given a vector type, return the type of each element.

Definition:ValueTypes.h:323

llvm::EVT::isScalarInteger

bool isScalarInteger() const

Return true if this is an integer, but not a vector.

Definition:ValueTypes.h:157

llvm::EVT::changeVectorElementType

EVT changeVectorElementType(EVT EltVT) const

Return a VT for a vector type whose attributes match ourselves with the exception of the element type...

Definition:ValueTypes.h:102

llvm::EVT::getVectorNumElements

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

Definition:ValueTypes.h:331

llvm::EVT::bitsLE

bool bitsLE(EVT VT) const

Return true if this has no more bits than VT.

Definition:ValueTypes.h:303

llvm::EVT::isInteger

bool isInteger() const

Return true if this is an integer or a vector integer type.

Definition:ValueTypes.h:152

llvm::ISD::ArgFlagsTy

Definition:TargetCallingConv.h:27

llvm::ISD::InputArg

InputArg - This struct carries flags and type information about a single incoming (formal) argument o...

Definition:TargetCallingConv.h:199

llvm::Inverse

Definition:GraphTraits.h:123

llvm::KnownBits

Definition:KnownBits.h:23

llvm::KnownBits::urem

static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)

Compute known bits for urem(LHS, RHS).

Definition:KnownBits.cpp:1049

llvm::KnownBits::isUnknown

bool isUnknown() const

Returns true if we don't know any bits.

Definition:KnownBits.h:65

llvm::KnownBits::countMaxTrailingZeros

unsigned countMaxTrailingZeros() const

Returns the maximum number of trailing zero bits possible.

Definition:KnownBits.h:266

llvm::KnownBits::trunc

KnownBits trunc(unsigned BitWidth) const

Return known bits for a truncation of the value we're tracking.

Definition:KnownBits.h:153

llvm::KnownBits::getBitWidth

unsigned getBitWidth() const

Get the bit width of this value.

Definition:KnownBits.h:43

llvm::KnownBits::zext

KnownBits zext(unsigned BitWidth) const

Return known bits for a zero extension of the value we're tracking.

Definition:KnownBits.h:164

llvm::KnownBits::resetAll

void resetAll()

Resets the known state of all bits.

Definition:KnownBits.h:73

llvm::KnownBits::countMaxActiveBits

unsigned countMaxActiveBits() const

Returns the maximum number of bits needed to represent all possible unsigned values with these known ...

Definition:KnownBits.h:288

llvm::KnownBits::intersectWith

KnownBits intersectWith(const KnownBits &RHS) const

Returns KnownBits information that is known to be true for both this and RHS.

Definition:KnownBits.h:303

llvm::KnownBits::sext

KnownBits sext(unsigned BitWidth) const

Return known bits for a sign extension of the value we're tracking.

Definition:KnownBits.h:172

llvm::KnownBits::udiv

static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)

Compute known bits for udiv(LHS, RHS).

Definition:KnownBits.cpp:1009

llvm::KnownBits::countMaxLeadingZeros

unsigned countMaxLeadingZeros() const

Returns the maximum number of leading zero bits possible.

Definition:KnownBits.h:272

llvm::KnownBits::One

APInt One

Definition:KnownBits.h:25

llvm::KnownBits::Zero

APInt Zero

Definition:KnownBits.h:24

llvm::KnownBits::shl

static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)

Compute known bits for shl(LHS, RHS).

Definition:KnownBits.cpp:285

llvm::MachinePointerInfo

This class contains a discriminated union of information about pointers in memory operands,...

Definition:MachineMemOperand.h:41

llvm::MachinePointerInfo::getStack

static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)

Stack pointer relative access.

Definition:MachineOperand.cpp:1085

llvm::MachinePointerInfo::getConstantPool

static MachinePointerInfo getConstantPool(MachineFunction &MF)

Return a MachinePointerInfo record that refers to the constant pool.

Definition:MachineOperand.cpp:1066

llvm::MachinePointerInfo::getWithOffset

MachinePointerInfo getWithOffset(int64_t O) const

Definition:MachineMemOperand.h:81

llvm::MachinePointerInfo::getGOT

static MachinePointerInfo getGOT(MachineFunction &MF)

Return a MachinePointerInfo record that refers to a GOT entry.

Definition:MachineOperand.cpp:1081

llvm::MachinePointerInfo::getFixedStack

static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.

Definition:MachineOperand.cpp:1072

llvm::MaybeAlign

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.

Definition:Alignment.h:117

llvm::MemOp

Definition:TargetLowering.h:115

llvm::RISCVRegisterInfo

Definition:RISCVRegisterInfo.h:56

llvm::RISCVRegisterInfo::getReservedRegs

BitVector getReservedRegs(const MachineFunction &MF) const override

Definition:RISCVRegisterInfo.cpp:101

llvm::RISCVRegisterInfo::getFrameRegister

Definition:RISCVRegisterInfo.cpp:715

llvm::RISCVVIntrinsicsTable::RISCVVIntrinsicInfo

Definition:RISCVISelLowering.h:1085

llvm::RISCVVInversePseudosTable::PseudoInfo

Definition:RISCVMCTargetDesc.h:42

llvm::RISCV::RISCVMaskedPseudoInfo

Definition:RISCVInstrInfo.h:386

llvm::RISCV::RISCVMaskedPseudoInfo::MaskedPseudo

uint16_t MaskedPseudo

Definition:RISCVInstrInfo.h:387

llvm::SDNodeFlags

These are IR-level optimization flags that may be propagated to SDNodes.

Definition:SelectionDAGNodes.h:381

llvm::SDNodeFlags::Disjoint

@ Disjoint

Definition:SelectionDAGNodes.h:398

llvm::SDNodeFlags::hasDisjoint

bool hasDisjoint() const

Definition:SelectionDAGNodes.h:461

llvm::SDVTList

This represents a list of ValueType's that has been intern'd by a SelectionDAG.

Definition:SelectionDAGNodes.h:79

llvm::TargetLoweringBase::AddrMode

This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...

Definition:TargetLowering.h:2816

llvm::TargetLoweringBase::AddrMode::BaseOffs

int64_t BaseOffs

Definition:TargetLowering.h:2818

llvm::TargetLoweringBase::AddrMode::BaseGV

GlobalValue * BaseGV

Definition:TargetLowering.h:2817

llvm::TargetLoweringBase::AddrMode::HasBaseReg

bool HasBaseReg

Definition:TargetLowering.h:2819

llvm::TargetLoweringBase::AddrMode::Scale

int64_t Scale

Definition:TargetLowering.h:2820

llvm::TargetLoweringBase::AddrMode::ScalableOffset

int64_t ScalableOffset

Definition:TargetLowering.h:2821

llvm::TargetLoweringBase::IntrinsicInfo

Definition:TargetLowering.h:1202

llvm::TargetLowering::CallLoweringInfo

This structure contains all information that is necessary for lowering calls.

Definition:TargetLowering.h:4529

llvm::TargetLowering::CallLoweringInfo::IsTailCall

bool IsTailCall

Definition:TargetLowering.h:4545

llvm::TargetLowering::CallLoweringInfo::Callee

SDValue Callee

Definition:TargetLowering.h:4552

llvm::TargetLowering::CallLoweringInfo::DL

SDLoc DL

Definition:TargetLowering.h:4555

llvm::TargetLowering::CallLoweringInfo::IsVarArg

bool IsVarArg

Definition:TargetLowering.h:4534

llvm::TargetLowering::CallLoweringInfo::Ins

SmallVector< ISD::InputArg, 32 > Ins

Definition:TargetLowering.h:4559

llvm::TargetLowering::CallLoweringInfo::CFIType

const ConstantInt * CFIType

Definition:TargetLowering.h:4561

llvm::TargetLowering::CallLoweringInfo::Chain

SDValue Chain

Definition:TargetLowering.h:4530

llvm::TargetLowering::CallLoweringInfo::NoMerge

bool NoMerge

Definition:TargetLowering.h:4541

llvm::TargetLowering::CallLoweringInfo::CB

const CallBase * CB

Definition:TargetLowering.h:4556

llvm::TargetLowering::CallLoweringInfo::Outs

SmallVector< ISD::OutputArg, 32 > Outs

Definition:TargetLowering.h:4557

llvm::TargetLowering::CallLoweringInfo::OutVals

SmallVector< SDValue, 32 > OutVals

Definition:TargetLowering.h:4558

llvm::TargetLowering::CallLoweringInfo::CallConv

CallingConv::ID CallConv

Definition:TargetLowering.h:4551

llvm::TargetLowering::CallLoweringInfo::DAG

SelectionDAG & DAG

Definition:TargetLowering.h:4554

llvm::TargetLowering::DAGCombinerInfo

Definition:TargetLowering.h:4228

llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG

bool isAfterLegalizeDAG() const

Definition:TargetLowering.h:4241

llvm::TargetLowering::DAGCombinerInfo::AddToWorklist

void AddToWorklist(SDNode *N)

Definition:DAGCombiner.cpp:916

llvm::TargetLowering::DAGCombinerInfo::isCalledByLegalizer

bool isCalledByLegalizer() const

Definition:TargetLowering.h:4243

llvm::TargetLowering::DAGCombinerInfo::recursivelyDeleteUnusedNodes

bool recursivelyDeleteUnusedNodes(SDNode *N)

Definition:DAGCombiner.cpp:936

llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize

bool isBeforeLegalize() const

Definition:TargetLowering.h:4239

llvm::TargetLowering::DAGCombinerInfo::DAG

SelectionDAG & DAG

Definition:TargetLowering.h:4234

llvm::TargetLowering::DAGCombinerInfo::CombineTo

SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)

Definition:DAGCombiner.cpp:921

llvm::TargetLowering::MakeLibCallOptions

This structure is used to pass arguments to makeLibCall function.

Definition:TargetLowering.h:4714

llvm::TargetLowering::MakeLibCallOptions::setTypeListBeforeSoften

MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)

Definition:TargetLowering.h:4749

llvm::TargetLowering::TargetLoweringOpt

A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...

Definition:TargetLowering.h:3946

llvm::TargetLowering::TargetLoweringOpt::DAG

SelectionDAG & DAG

Definition:TargetLowering.h:3947

llvm::TargetLowering::TargetLoweringOpt::CombineTo

bool CombineTo(SDValue O, SDValue N)

Definition:TargetLowering.h:3960

llvm::TargetLowering::TargetLoweringOpt::LegalOps

bool LegalOps

Definition:TargetLowering.h:3949

llvm::cl::desc

Definition:CommandLine.h:409

llvm::fltSemantics

Definition:APFloat.cpp:103