1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 22#define DEBUG_TYPE "pipeliner" 27cl::desc(
"Swap target blocks of a conditional branch for MVE expander"));
34//===----------------------------------------------------------------------===// 35// ModuloScheduleExpander implementation 36//===----------------------------------------------------------------------===// 38/// Return the register values for the operands of a Phi instruction. 39/// This function assume the instruction is a Phi. 41unsigned &InitVal,
unsigned &LoopVal) {
42assert(Phi.isPHI() &&
"Expecting a Phi.");
46for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47if (Phi.getOperand(i + 1).getMBB() !=
Loop)
48 InitVal = Phi.getOperand(i).getReg();
50 LoopVal = Phi.getOperand(i).getReg();
52assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
55/// Return the Phi register value that comes from the incoming block. 57for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59return Phi.getOperand(i).getReg();
63/// Return the Phi register value that comes the loop block. 65for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67return Phi.getOperand(i).getReg();
77// Iterate over the definitions in each instruction, and compute the 78// stage difference for each use. Keep the maximum value. 84bool PhiIsSwapped =
false;
89if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
92if (isLoopCarried(*
MI))
97 MaxDiff = std::max(Diff, MaxDiff);
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
103 generatePipelinedLoop();
106void ModuloScheduleExpander::generatePipelinedLoop() {
110// Create a new basic block for the kernel and add it to the CFG. 115// Remember the registers that are used in different stages. The index is 116// the iteration, or stage, that the instruction is scheduled in. This is 117// a map between register names in the original block and the names created 118// in each stage of the pipelined loop. 119 ValueMapTy *VRMap =
new ValueMapTy[(MaxStageCount + 1) * 2];
121// The renaming destination by Phis for the registers across stages. 122// This map is updated during Phis generation to point to the most recent 123// renaming destination. 124 ValueMapTy *VRMapPhi =
new ValueMapTy[(MaxStageCount + 1) * 2];
130// Generate the prolog instructions that set up the pipeline. 131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
135// Rearrange the instructions to generate the new, pipelined loop, 136// and update register names as needed. 140unsigned StageNum = Schedule.
getStage(CI);
141MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142 updateInstruction(NewMI,
false, MaxStageCount, StageNum, VRMap);
144 InstrMap[NewMI] = CI;
147// Copy any terminator instructions to the new kernel, and update 151 updateInstruction(NewMI,
false, MaxStageCount, 0, VRMap);
153 InstrMap[NewMI] = &
MI;
156 NewKernel = KernelBB;
160 generateExistingPhis(KernelBB, PrologBBs.
back(), KernelBB, KernelBB, VRMap,
161 InstrMap, MaxStageCount, MaxStageCount,
false);
162 generatePhis(KernelBB, PrologBBs.
back(), KernelBB, KernelBB, VRMap, VRMapPhi,
163 InstrMap, MaxStageCount, MaxStageCount,
false);
168// Generate the epilog instructions to complete the pipeline. 169 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
172// We need this step because the register allocation doesn't handle some 173// situations well, so we insert copies to help out. 174 splitLifetimes(KernelBB, EpilogBBs);
176// Remove dead instructions due to loop induction variables. 177 removeDeadInstructions(KernelBB, EpilogBBs);
179// Add branches between prolog and epilog blocks. 180 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
187// Remove the original loop since it's no longer referenced. 191 BB->eraseFromParent();
194/// Generate the pipeline prolog code. 195void ModuloScheduleExpander::generateProlog(
unsigned LastStage,
198 MBBVectorTy &PrologBBs) {
202// Generate a basic block for each stage, not including the last stage, 203// which will be generated in the kernel. Each basic block may contain 204// instructions from multiple stages/iterations. 205for (
unsigned i = 0; i < LastStage; ++i) {
206// Create and insert the prolog basic block prior to the original loop 207// basic block. The original loop is removed later. 216// Generate instructions for each appropriate stage. Process instructions 217// in original program order. 218for (
int StageNum = i; StageNum >= 0; --StageNum) {
222if (Schedule.
getStage(&*BBI) == StageNum) {
226 cloneAndChangeInstr(&*BBI, i, (
unsigned)StageNum);
227 updateInstruction(NewMI,
false, i, (
unsigned)StageNum, VRMap);
229 InstrMap[NewMI] = &*BBI;
233 rewritePhiValues(NewBB, i, VRMap, InstrMap);
242// Check if we need to remove the branch from the preheader to the original 243// loop, and replace it with a branch to the new loop. 251/// Generate the pipeline epilog code. The epilog code finishes the iterations 252/// that were started in either the prolog or the kernel. We create a basic 253/// block for each stage that needs to complete. 254void ModuloScheduleExpander::generateEpilog(
256 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
257 MBBVectorTy &PrologBBs) {
258// We need to change the branch from the kernel to the first epilog block, so 259// this call to analyze branch uses the kernel rather than the original BB. 263assert(!checkBranch &&
"generateEpilog must be able to analyze the branch");
268if (*LoopExitI == KernelBB)
270assert(LoopExitI != KernelBB->
succ_end() &&
"Expecting a successor");
277// Generate a basic block for each stage, not including the last stage, 278// which was generated for the kernel. Each basic block may contain 279// instructions from multiple stages/iterations. 280int EpilogStage = LastStage + 1;
281for (
unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
290if (EpilogStart == LoopExitBB)
293// Add instructions to the epilog depending on the current block. 294// Process instructions in original program order. 295for (
unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
296for (
auto &BBI : *BB) {
300if ((
unsigned)Schedule.
getStage(In) == StageNum) {
301// Instructions with memoperands in the epilog are updated with 302// conservative values. 304 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
310 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
311 InstrMap, LastStage, EpilogStage, i == 1);
312 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
313 InstrMap, LastStage, EpilogStage, i == 1);
322// Fix any Phi nodes in the loop exit block. 325// Create a branch to the new epilog from the kernel. 326// Remove the original branch and add a new branch to the epilog. 329"Unable to determine looping branch direction");
334// Add a branch to the loop exit. 335if (EpilogBBs.size() > 0) {
342/// Replace all uses of FromReg that appear outside the specified 343/// basic block with ToReg. 350if (O.getParent()->getParent() !=
MBB)
356/// Return true if the register has a use that occurs outside the 361if (MO.getParent()->getParent() != BB)
366/// Generate Phis for the specific block in the generated pipelined code. 367/// This function looks at the Phis from the original code to guide the 368/// creation of new Phis. 369void ModuloScheduleExpander::generateExistingPhis(
372unsigned LastStageNum,
unsigned CurStageNum,
bool IsLast) {
373// Compute the stage number for the initial value of the Phi, which 374// comes from the prolog. The prolog to use depends on to which kernel/ 375// epilog that we're adding the Phi. 376unsigned PrologStage = 0;
377unsigned PrevStage = 0;
378bool InKernel = (LastStageNum == CurStageNum);
380 PrologStage = LastStageNum - 1;
381 PrevStage = CurStageNum;
383 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
384 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
388 BBE = BB->getFirstNonPHI();
397// The Phi value from the loop body typically is defined in the loop, but 398// not always. So, we need to check if the value is defined in the loop. 399unsigned PhiOp2 = LoopVal;
400if (
auto It = VRMap[LastStageNum].
find(LoopVal);
401 It != VRMap[LastStageNum].end())
404int StageScheduled = Schedule.
getStage(&*BBI);
406unsigned NumStages = getStagesForReg(Def, CurStageNum);
408// We don't need to generate a Phi anymore, but we need to rename any uses 410unsigned NewReg = VRMap[PrevStage][LoopVal];
411 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
413if (VRMap[CurStageNum].
count(LoopVal))
414 VRMap[CurStageNum][
Def] = VRMap[CurStageNum][LoopVal];
416// Adjust the number of Phis needed depending on the number of prologs left, 417// and the distance from where the Phi is first scheduled. The number of 418// Phis cannot exceed the number of prolog stages. Each stage can 419// potentially define two values. 420unsigned MaxPhis = PrologStage + 2;
421if (!InKernel && (
int)PrologStage <= LoopValStage)
422 MaxPhis = std::max((
int)MaxPhis - (
int)LoopValStage, 1);
423unsigned NumPhis = std::min(NumStages, MaxPhis);
426unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
427// In the epilog, we may need to look back one stage to get the correct 428// Phi name, because the epilog and prolog blocks execute the same stage. 429// The correct name is from the previous block only when the Phi has 430// been completely scheduled prior to the epilog, and Phi value is not 431// needed in multiple stages. 433if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
436// Adjust the computations below when the phi and the loop definition 437// are scheduled in different stages. 438if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
439 StageDiff = StageScheduled - LoopValStage;
440for (
unsigned np = 0; np < NumPhis; ++np) {
441// If the Phi hasn't been scheduled, then use the initial Phi operand 442// value. Otherwise, use the scheduled version of the instruction. This 443// is a little complicated when a Phi references another Phi. 444if (np > PrologStage || StageScheduled >= (
int)LastStageNum)
446// Check if the Phi has already been scheduled in a prolog stage. 447elseif (PrologStage >= AccessStage + StageDiff + np &&
448 VRMap[PrologStage - StageDiff - np].
count(LoopVal) != 0)
449 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
450// Check if the Phi has already been scheduled, but the loop instruction 451// is either another Phi, or doesn't occur in the loop. 452elseif (PrologStage >= AccessStage + StageDiff + np) {
453// If the Phi references another Phi, we need to examine the other 454// Phi to get the correct value. 458while (InstOp1 && InstOp1->
isPHI() && InstOp1->
getParent() == BB) {
459int PhiStage = Schedule.
getStage(InstOp1);
460if ((
int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
465int PhiOpStage = Schedule.
getStage(InstOp1);
466int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
467if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
468 VRMap[PrologStage - StageAdj - Indirects - np].
count(PhiOp1)) {
469 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
476// If this references a generated Phi in the kernel, get the Phi operand 477// from the incoming block. 483bool LoopDefIsPhi = PhiInst && PhiInst->
isPHI();
484// In the epilog, a map lookup is needed to get the value from the kernel, 485// or previous epilog block. How is does this depends on if the 486// instruction is scheduled in the previous block. 489if (LoopValStage != -1 && StageScheduled > LoopValStage)
490 StageDiffAdj = StageScheduled - LoopValStage;
491// Use the loop value defined in the kernel, unless the kernel 492// contains the last definition of the Phi. 493if (np == 0 && PrevStage == LastStageNum &&
494 (StageScheduled != 0 || LoopValStage != 0) &&
495 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
496 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
497// Use the value defined by the Phi. We add one because we switch 498// from looking at the loop value to the Phi definition. 499elseif (np > 0 && PrevStage == LastStageNum &&
500 VRMap[PrevStage - np + 1].
count(Def))
501 PhiOp2 = VRMap[PrevStage - np + 1][Def];
502// Use the loop value defined in the kernel. 503elseif (
static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
504 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
505 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
506// Use the value defined by the Phi, unless we're generating the first 507// epilog and the Phi refers to a Phi in a different stage. 508elseif (VRMap[PrevStage - np].
count(Def) &&
509 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
510 (LoopValStage == StageScheduled)))
511 PhiOp2 = VRMap[PrevStage - np][
Def];
514// Check if we can reuse an existing Phi. This occurs when a Phi 515// references another Phi, and the other Phi is scheduled in an 516// earlier stage. We can try to reuse an existing Phi up until the last 517// stage of the current Phi. 519if (
static_cast<int>(PrologStage - np) >= StageScheduled) {
520int LVNumStages = getStagesForPhi(LoopVal);
521int StageDiff = (StageScheduled - LoopValStage);
522 LVNumStages -= StageDiff;
523// Make sure the loop value Phi has been processed already. 524if (LVNumStages > (
int)np && VRMap[CurStageNum].count(LoopVal)) {
526unsigned ReuseStage = CurStageNum;
527if (isLoopCarried(*PhiInst))
528 ReuseStage -= LVNumStages;
529// Check if the Phi to reuse has been generated yet. If not, then 530// there is nothing to reuse. 531if (VRMap[ReuseStage - np].
count(LoopVal)) {
532 NewReg = VRMap[ReuseStage - np][LoopVal];
534 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
536// Update the map with the new Phi name. 537 VRMap[CurStageNum - np][
Def] = NewReg;
539if (VRMap[LastStageNum - np - 1].
count(LoopVal))
540 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
542if (IsLast && np == NumPhis - 1)
548if (InKernel && StageDiff > 0 &&
549 VRMap[CurStageNum - StageDiff - np].
count(LoopVal))
550 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
558 TII->
get(TargetOpcode::PHI), NewReg);
562 InstrMap[NewPhi] = &*BBI;
564// We define the Phis after creating the new pipelined code, so 565// we need to rename the Phi values in scheduled instructions. 568if (InKernel && VRMap[PrevStage - np].
count(LoopVal))
569 PrevReg = VRMap[PrevStage - np][LoopVal];
570 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
572// If the Phi has been scheduled, use the new name for rewriting. 573if (VRMap[CurStageNum - np].
count(Def)) {
574unsignedR = VRMap[CurStageNum - np][
Def];
575 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
579// Check if we need to rename any uses that occurs after the loop. The 580// register to replace depends on whether the Phi is scheduled in the 582if (IsLast && np == NumPhis - 1)
585// In the kernel, a dependent Phi uses the value from this Phi. 589// Update the map with the new Phi name. 590 VRMap[CurStageNum - np][
Def] = NewReg;
593while (NumPhis++ < NumStages) {
594 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
598// Check if we need to rename a Phi that has been eliminated due to 600if (NumStages == 0 && IsLast && VRMap[CurStageNum].
count(LoopVal))
605/// Generate Phis for the specified block in the generated pipelined code. 606/// These are new Phis needed because the definition is scheduled after the 607/// use in the pipelined sequence. 608void ModuloScheduleExpander::generatePhis(
611 InstrMapTy &InstrMap,
unsigned LastStageNum,
unsigned CurStageNum,
613// Compute the stage number that contains the initial Phi value, and 614// the Phi from the previous stage. 615unsigned PrologStage = 0;
616unsigned PrevStage = 0;
617unsigned StageDiff = CurStageNum - LastStageNum;
618bool InKernel = (StageDiff == 0);
620 PrologStage = LastStageNum - 1;
621 PrevStage = CurStageNum;
623 PrologStage = LastStageNum - StageDiff;
624 PrevStage = LastStageNum + StageDiff - 1;
628 BBE = BB->instr_end();
630for (
unsigned i = 0, e = BBI->getNumOperands(); i !=
e; ++i) {
635int StageScheduled = Schedule.
getStage(&*BBI);
636assert(StageScheduled != -1 &&
"Expecting scheduled instruction.");
638unsigned NumPhis = getStagesForReg(Def, CurStageNum);
639// An instruction scheduled in stage 0 and is used after the loop 640// requires a phi in the epilog for the last definition from either 641// the kernel or prolog. 642if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
645if (!InKernel && (
unsigned)StageScheduled > PrologStage)
650 PhiOp2 = VRMap[PrevStage][
Def];
652if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
655// The number of Phis can't exceed the number of prolog stages. The 656// prolog stage number is zero based. 657if (NumPhis > PrologStage + 1 - StageScheduled)
658 NumPhis = PrologStage + 1 - StageScheduled;
659for (
unsigned np = 0; np < NumPhis; ++np) {
662// %Org = ... (Scheduled at Stage#0, NumPhi = 2) 669// %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel 670// %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel 673// %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel 674// %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel 676// %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0 678// VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2} 679// VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0} 680// VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2} 682unsigned PhiOp1 = VRMap[PrologStage][
Def];
683if (np <= PrologStage)
684 PhiOp1 = VRMap[PrologStage - np][
Def];
686if (PrevStage == LastStageNum && np == 0)
687 PhiOp2 = VRMap[LastStageNum][
Def];
689 PhiOp2 = VRMapPhi[PrevStage - np][
Def];
697 TII->
get(TargetOpcode::PHI), NewReg);
701 InstrMap[NewPhi] = &*BBI;
703// Rewrite uses and update the map. The actions depend upon whether 704// we generating code for the kernel or epilog blocks. 706 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
708 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
712 VRMapPhi[PrevStage - np - 1][
Def] = NewReg;
714 VRMapPhi[CurStageNum - np][
Def] = NewReg;
715if (np == NumPhis - 1)
716 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
719if (IsLast && np == NumPhis - 1)
726/// Remove instructions that generate values with no uses. 727/// Typically, these are induction variable operations that generate values 728/// used in the loop itself. A dead instruction has a definition with 729/// no uses, or uses that occur in the original loop only. 731 MBBVectorTy &EpilogBBs) {
732// For each epilog block, check that the value defined by each instruction 733// is used. If not, delete it. 738// From DeadMachineInstructionElem. Don't delete inline assembly. 739if (
MI->isInlineAsm()) {
744// Check if it's safe to remove the instruction due to side effects. 745// We can, and want to, remove Phis here. 746if (!
MI->isSafeToMove(SawStore) && !
MI->isPHI()) {
753// Assume physical registers are used, unless they are marked dead. 760unsigned realUses = 0;
762// Check if there are any uses that occur only in the original 763// loop. If so, that's not a real use. 764if (
U.getParent()->getParent() != BB) {
776MI++->eraseFromParent();
781// In the kernel block, check if we can remove a Phi that generates a value 782// used in an instruction removed in the epilog block. 792/// For loop carried definitions, we split the lifetime of a virtual register 793/// that has uses past the definition in the next iteration. A copy with a new 794/// virtual register is inserted before the definition, which helps with 795/// generating a better register assignment. 797/// v1 = phi(a, v2) v1 = phi(a, v2) 798/// v2 = phi(b, v3) v2 = phi(b, v3) 799/// v3 = .. v4 = copy v1 803 MBBVectorTy &EpilogBBs) {
805for (
auto &
PHI : KernelBB->
phis()) {
807// Check for any Phi definition that used as an operand of another Phi 812if (
I->isPHI() &&
I->getParent() == KernelBB) {
813// Get the loop carried definition. 818if (!
MI ||
MI->getParent() != KernelBB ||
MI->isPHI())
820// Search through the rest of the block looking for uses of the Phi 821// definition. If one occurs, then split the lifetime. 822unsigned SplitReg = 0;
825if (BBJ.readsRegister(Def,
/*TRI=*/nullptr)) {
826// We split the lifetime when we find the first use. 830 TII->
get(TargetOpcode::COPY), SplitReg)
833 BBJ.substituteRegister(Def, SplitReg, 0, *
TRI);
837// Search through each of the epilog blocks for any uses to be renamed. 838for (
auto &
Epilog : EpilogBBs)
840if (
I.readsRegister(Def,
/*TRI=*/nullptr))
841I.substituteRegister(Def, SplitReg, 0, *
TRI);
848/// Remove the incoming block from the Phis in a basic block. 853for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2)
854if (
MI.getOperand(i + 1).getMBB() ==
Incoming) {
855MI.removeOperand(i + 1);
862/// Create branches from each prolog basic block to the appropriate epilog 863/// block. These edges are needed if the loop ends before reaching the 866 MBBVectorTy &PrologBBs,
868 MBBVectorTy &EpilogBBs,
870assert(PrologBBs.size() == EpilogBBs.size() &&
"Prolog/Epilog mismatch");
874// Start from the blocks connected to the kernel and work "out" 875// to the first prolog and the last epilog blocks. 877unsigned MaxIter = PrologBBs.
size() - 1;
878for (
unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --
j) {
879// Add branches to the prolog that go to the corresponding 880// epilog, and the fall-thru prolog/kernel block. 885 std::optional<bool> StaticallyGreater =
887unsigned numAdded = 0;
888if (!StaticallyGreater) {
891 }
elseif (*StaticallyGreater ==
false) {
893Prolog->removeSuccessor(LastPro);
897// Remove the blocks that are no longer referenced. 898if (LastPro != LastEpi) {
902if (LastPro == KernelBB) {
916I != E && numAdded > 0; ++
I, --numAdded)
917 updateInstruction(&*
I,
false, j, 0, VRMap);
921LoopInfo->setPreheader(PrologBBs[MaxIter]);
922LoopInfo->adjustTripCount(-(MaxIter + 1));
926/// Return true if we can compute the amount the instruction changes 927/// during each iteration. Set Delta to the amount of the change. 928bool ModuloScheduleExpander::computeDelta(
MachineInstr &
MI,
unsigned &Delta) {
932bool OffsetIsScalable;
936// FIXME: This algorithm assumes instructions have fixed-size offsets. 946// Check if there is a Phi. If so, get the definition in the loop. 948if (BaseDef && BaseDef->
isPHI()) {
950 BaseDef =
MRI.getVRegDef(BaseReg);
963/// Update the memory operand with a new offset when the pipeliner 964/// generates a new copy of the instruction that refers to a 965/// different memory location. 966void ModuloScheduleExpander::updateMemOperands(
MachineInstr &NewMI,
971// If the instruction has memory operands, then adjust the offset 972// when the instruction appears in different stages. 977// TODO: Figure out whether isAtomic is really necessary (see D57601). 978if (MMO->isVolatile() || MMO->isAtomic() ||
979 (MMO->isInvariant() && MMO->isDereferenceable()) ||
980 (!MMO->getValue())) {
985if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
986 int64_t AdjOffset = Delta * Num;
997/// Clone the instruction for the new pipelined loop and update the 998/// memory operands, if needed. 1000unsigned CurStageNum,
1001unsigned InstStageNum) {
1003 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1007/// Clone the instruction for the new pipelined loop. If needed, this 1008/// function updates the instruction using the values saved in the 1009/// InstrChanges structure. 1010MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1011MachineInstr *OldMI,
unsigned CurStageNum,
unsigned InstStageNum) {
1013auto It = InstrChanges.
find(OldMI);
1014if (It != InstrChanges.
end()) {
1015 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1016unsigned BasePos, OffsetPos;
1020MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1021if (Schedule.
getStage(LoopDef) > (
signed)InstStageNum)
1022 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1025 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1029/// Update the machine instruction with new virtual registers. This 1030/// function may change the definitions and/or uses. 1031void ModuloScheduleExpander::updateInstruction(
MachineInstr *NewMI,
1033unsigned CurStageNum,
1034unsigned InstrStageNum,
1035 ValueMapTy *VRMap) {
1041// Create a new virtual register for the definition. 1045 VRMap[CurStageNum][reg] = NewReg;
1048 }
elseif (MO.
isUse()) {
1050// Compute the stage that contains the last definition for instruction. 1051int DefStageNum = Schedule.
getStage(Def);
1052unsigned StageNum = CurStageNum;
1053if (DefStageNum != -1 && (
int)InstrStageNum > DefStageNum) {
1054// Compute the difference in stages between the defintion and the use. 1055unsigned StageDiff = (InstrStageNum - DefStageNum);
1056// Make an adjustment to get the last definition. 1057 StageNum -= StageDiff;
1059if (
auto It = VRMap[StageNum].
find(reg); It != VRMap[StageNum].end())
1065/// Return the instruction in the loop that defines the register. 1066/// If the definition is a Phi, then follow the Phi operand to 1067/// the instruction in the loop. 1068MachineInstr *ModuloScheduleExpander::findDefInLoop(
unsigned Reg) {
1071while (
Def->isPHI()) {
1072if (!Visited.
insert(Def).second)
1074for (
unsigned i = 1, e =
Def->getNumOperands(); i < e; i += 2)
1075if (
Def->getOperand(i + 1).getMBB() == BB) {
1076Def =
MRI.getVRegDef(
Def->getOperand(i).getReg());
1083/// Return the new name for the value from the previous stage. 1084unsigned ModuloScheduleExpander::getPrevMapVal(
1085unsigned StageNum,
unsigned PhiStage,
unsigned LoopVal,
unsigned LoopStage,
1087unsigned PrevVal = 0;
1088if (StageNum > PhiStage) {
1090if (PhiStage == LoopStage && VRMap[StageNum - 1].
count(LoopVal))
1091// The name is defined in the previous stage. 1092 PrevVal = VRMap[StageNum - 1][LoopVal];
1093elseif (VRMap[StageNum].
count(LoopVal))
1094// The previous name is defined in the current stage when the instruction 1096 PrevVal = VRMap[StageNum][LoopVal];
1098// The loop value hasn't yet been scheduled. 1100elseif (StageNum == PhiStage + 1)
1101// The loop value is another phi, which has not been scheduled. 1103elseif (StageNum > PhiStage + 1 && LoopInst->
getParent() == BB)
1104// The loop value is another phi, which has been scheduled. 1106 getPrevMapVal(StageNum - 1, PhiStage,
getLoopPhiReg(*LoopInst, BB),
1107 LoopStage, VRMap, BB);
1112/// Rewrite the Phi values in the specified block to use the mappings 1113/// from the initial operand. Once the Phi is scheduled, we switch 1114/// to using the loop value instead of the Phi value, so those names 1115/// do not need to be rewritten. 1119 InstrMapTy &InstrMap) {
1121unsigned InitVal = 0;
1122unsigned LoopVal = 0;
1128unsigned NumPhis = getStagesForPhi(PhiDef);
1129if (NumPhis > StageNum)
1131for (
unsigned np = 0; np <= NumPhis; ++np) {
1133 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1136 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &
PHI, PhiDef,
1142/// Rewrite a previously scheduled instruction to use the register value 1143/// from the new instruction. Make sure the instruction occurs in the 1144/// basic block, and we don't change the uses in the new instruction. 1145void ModuloScheduleExpander::rewriteScheduledInstr(
1147unsigned PhiNum,
MachineInstr *Phi,
unsigned OldReg,
unsigned NewReg,
1150int StagePhi = Schedule.
getStage(Phi) + PhiNum;
1151// Rewrite uses that have been scheduled already to use the new 1165assert(OrigInstr != InstrMap.end() &&
"Instruction not scheduled.");
1167int StageSched = Schedule.
getStage(OrigMI);
1168int CycleSched = Schedule.
getCycle(OrigMI);
1169unsigned ReplaceReg = 0;
1170// This is the stage for the scheduled instruction. 1171if (StagePhi == StageSched &&
Phi->isPHI()) {
1172int CyclePhi = Schedule.
getCycle(Phi);
1173if (PrevReg && InProlog)
1174 ReplaceReg = PrevReg;
1175elseif (PrevReg && !isLoopCarried(*Phi) &&
1176 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1177 ReplaceReg = PrevReg;
1179 ReplaceReg = NewReg;
1181// The scheduled instruction occurs before the scheduled Phi, and the 1182// Phi is not loop carried. 1183if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1184 ReplaceReg = NewReg;
1185if (StagePhi > StageSched &&
Phi->isPHI())
1186 ReplaceReg = NewReg;
1187if (!InProlog && !
Phi->isPHI() && StagePhi < StageSched)
1188 ReplaceReg = NewReg;
1191MRI.constrainRegClass(ReplaceReg,
MRI.getRegClass(OldReg));
1193 UseOp.setReg(ReplaceReg);
1195Register SplitReg =
MRI.createVirtualRegister(
MRI.getRegClass(OldReg));
1199 UseOp.setReg(SplitReg);
1205bool ModuloScheduleExpander::isLoopCarried(
MachineInstr &Phi) {
1208int DefCycle = Schedule.
getCycle(&Phi);
1209int DefStage = Schedule.
getStage(&Phi);
1211unsigned InitVal = 0;
1212unsigned LoopVal = 0;
1219return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1222//===----------------------------------------------------------------------===// 1223// PeelingModuloScheduleExpander implementation 1224//===----------------------------------------------------------------------===// 1225// This is a reimplementation of ModuloScheduleExpander that works by creating 1226// a fully correct steady-state kernel and peeling off the prolog and epilogs. 1227//===----------------------------------------------------------------------===// 1230// Remove any dead phis in MBB. Dead phis either have only one block as input 1231// (in which case they are the identity) or have no uses. 1239if (
MRI.use_empty(
MI.getOperand(0).getReg())) {
1242MI.eraseFromParent();
1244 }
elseif (!KeepSingleSrcPhi &&
MI.getNumExplicitOperands() == 3) {
1246MRI.constrainRegClass(
MI.getOperand(1).getReg(),
1247MRI.getRegClass(
MI.getOperand(0).getReg()));
1248assert(ConstrainRegClass &&
1249"Expected a valid constrained register class!");
1250 (void)ConstrainRegClass;
1251MRI.replaceRegWith(
MI.getOperand(0).getReg(),
1252MI.getOperand(1).getReg());
1255MI.eraseFromParent();
1262/// Rewrites the kernel block in-place to adhere to the given schedule. 1263/// KernelRewriter holds all of the state required to perform the rewriting. 1264classKernelRewriter {
1272// Map from register class to canonical undef register for that class. 1274// Map from <LoopReg, InitReg> to phi register for all created phis. Note that 1275// this map is only used when InitReg is non-undef. 1277// Map from LoopReg to phi register where the InitReg is undef. 1280// Reg is used by MI. Return the new register MI should use to adhere to the 1281// schedule. Insert phis as necessary. 1283// Insert a phi that carries LoopReg from the loop body and InitReg otherwise. 1284// If InitReg is not given it is chosen arbitrarily. It will either be undef 1285// or will be chosen so as to share another phi. 1288// Create an undef register of the given register class. 1300 : S(S), BB(LoopBB), PreheaderBB(
L.getLoopPreheader()),
1301 ExitBB(
L.getExitBlock()),
MRI(BB->
getParent()->getRegInfo()),
1302TII(BB->
getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1304if (PreheaderBB == BB)
1308void KernelRewriter::rewrite() {
1309// Rearrange the loop to be in schedule order. Note that the schedule may 1310// contain instructions that are not owned by the loop block (InstrChanges and 1311// friends), so we gracefully handle unowned instructions and delete any 1312// instructions that weren't in the schedule. 1319MI->removeFromParent();
1324assert(FirstMI &&
"Failed to find first MI in schedule");
1326// At this point all of the scheduled instructions are between FirstMI 1327// and the end of the block. Kill from the first non-phi to FirstMI. 1331 (
I++)->eraseFromParent();
1334// Now remap every instruction in the loop. 1336if (
MI.isPHI() ||
MI.isTerminator())
1345 EliminateDeadPhis(BB,
MRI, LIS);
1347// Ensure a phi exists for all instructions that are either referenced by 1348// an illegal phi or by an instruction outside the loop. This allows us to 1349// treat remaps of these values the same as "normal" values that come from 1350// loop-carried phis. 1351for (
autoMI = BB->getFirstNonPHI();
MI != BB->end(); ++
MI) {
1360if (
MI.getParent() != BB) {
1376// Non-phi producers are simple to remap. Insert as many phis as the 1377// difference between the consumer and producer stages. 1379// Producer was not inside the loop. Use the register as-is. 1381int ProducerStage = S.
getStage(Producer);
1382assert(ConsumerStage != -1 &&
1383"In-loop consumer should always be scheduled!");
1384assert(ConsumerStage >= ProducerStage);
1385unsigned StageDiff = ConsumerStage - ProducerStage;
1387for (
unsignedI = 0;
I < StageDiff; ++
I)
1392// First, dive through the phi chain to find the defaults for the generated 1397while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1400 LoopProducer =
MRI.getUniqueVRegDef(LoopReg);
1403int LoopProducerStage = S.
getStage(LoopProducer);
1405 std::optional<Register> IllegalPhiDefault;
1407if (LoopProducerStage == -1) {
1409 }
elseif (LoopProducerStage > ConsumerStage) {
1410// This schedule is only representable if ProducerStage == ConsumerStage+1. 1411// In addition, Consumer's cycle must be scheduled after Producer in the 1412// rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP 1414#ifndef NDEBUG// Silence unused variables in non-asserts mode. 1415int LoopProducerCycle = S.
getCycle(LoopProducer);
1418assert(LoopProducerCycle <= ConsumerCycle);
1419assert(LoopProducerStage == ConsumerStage + 1);
1420// Peel off the first phi from Defaults and insert a phi between producer 1421// and consumer. This phi will not be at the front of the block so we 1422// consider it illegal. It will only exist during the rewrite process; it 1423// needs to exist while we peel off prologs because these could take the 1424// default value. After that we can replace all uses with the loop producer 1426 IllegalPhiDefault = Defaults.
front();
1429assert(ConsumerStage >= LoopProducerStage);
1430int StageDiff = ConsumerStage - LoopProducerStage;
1433 <<
" to " << (Defaults.
size() + StageDiff) <<
"\n");
1434// If we need more phis than we have defaults for, pad out with undefs for 1435// the earliest phis, which are at the end of the defaults chain (the 1436// chain is in reverse order). 1438 Defaults.
empty() ? std::optional<Register>()
1443// Now we know the number of stages to jump back, insert the phi chain. 1444auto DefaultI = Defaults.
rbegin();
1445while (DefaultI != Defaults.
rend())
1446 LoopReg =
phi(LoopReg, *DefaultI++,
MRI.getRegClass(Reg));
1448if (IllegalPhiDefault) {
1449// The consumer optionally consumes LoopProducer in the same iteration 1450// (because the producer is scheduled at an earlier cycle than the consumer) 1451// or the initial value. To facilitate this we create an illegal block here 1452// by embedding a phi in the middle of the block. We will fix this up 1453// immediately prior to pruning. 1454auto RC =
MRI.getRegClass(Reg);
1458 .
addReg(*IllegalPhiDefault)
1459 .
addMBB(PreheaderBB)
// Block choice is arbitrary and has no effect. 1461 .
addMBB(BB);
// Block choice is arbitrary and has no effect. 1462// Illegal phi should belong to the producer stage so that it can be 1463// filtered correctly during peeling. 1464 S.
setStage(IllegalPhi, LoopProducerStage);
1471Register KernelRewriter::phi(
Register LoopReg, std::optional<Register> InitReg,
1473// If the init register is not undef, try and find an existing phi. 1475autoI = Phis.find({LoopReg, *InitReg});
1479for (
auto &KV : Phis) {
1480if (KV.first.first == LoopReg)
1485// InitReg is either undef or no existing phi takes InitReg as input. Try and 1486// find a phi that takes undef as input. 1487autoI = UndefPhis.find(LoopReg);
1488if (
I != UndefPhis.end()) {
1491// Found a phi taking undef as input, and this input is undef so return 1492// without any more changes. 1494// Found a phi taking undef as input, so rewrite it to take InitReg. 1496MI->getOperand(1).setReg(*InitReg);
1497 Phis.insert({{LoopReg, *InitReg},
R});
1499MRI.constrainRegClass(R,
MRI.getRegClass(*InitReg));
1500assert(ConstrainRegClass &&
"Expected a valid constrained register class!");
1501 (void)ConstrainRegClass;
1506// Failed to find any existing phi to reuse, so create a new one. 1508 RC =
MRI.getRegClass(LoopReg);
1512MRI.constrainRegClass(R,
MRI.getRegClass(*InitReg));
1513assert(ConstrainRegClass &&
"Expected a valid constrained register class!");
1514 (void)ConstrainRegClass;
1517 .
addReg(InitReg ? *InitReg : undef(RC))
1522 UndefPhis[LoopReg] =
R;
1524 Phis[{LoopReg, *InitReg}] =
R;
1531// Create an IMPLICIT_DEF that defines this register if we need it. 1532// All uses of this should be removed by the time we have finished unrolling 1533// prologs and epilogs. 1534R =
MRI.createVirtualRegister(RC);
1537TII->get(TargetOpcode::IMPLICIT_DEF), R);
1543/// Describes an operand in the kernel of a pipelined loop. Characteristics of 1544/// the operand are discovered, such as how many in-loop PHIs it has to jump 1545/// through and defaults for these phis. 1546classKernelOperandInfo {
1559while (isRegInLoop(MO)) {
1561if (
MI->isFullCopy()) {
1562 MO = &
MI->getOperand(1);
1567// If this is an illegal phi, don't count it in distance. 1569 MO = &
MI->getOperand(3);
1574 MO =
MI->getOperand(2).getMBB() == BB ? &
MI->getOperand(1)
1575 : &
MI->getOperand(3);
1582return PhiDefaults.
size() ==
Other.PhiDefaults.size();
1586OS <<
"use of " << *
Source <<
": distance(" << PhiDefaults.
size() <<
") in " 1593MRI.getVRegDef(MO->
getReg())->getParent() == BB;
1621if (Stage == -1 || Stage >= MinStage)
1627// Only PHIs can use values from this block by construction. 1628// Match with the equivalent PHI in B. 1634for (
auto &Sub : Subs)
1635 Sub.first->substituteRegister(DefMO.getReg(), Sub.second,
/*SubIdx=*/0,
1640MI->eraseFromParent();
1651// This is an illegal PHI. If we move any instructions using an illegal 1652// PHI, we need to create a legal Phi. 1654// The legal Phi is not necessary if the illegal phi's stage 1670MI.removeFromParent();
1674BlockMIs.erase({SourceBB, KernelMI});
1680// If the instruction referenced by the phi is moved inside the block 1681// we don't need the phi anymore. 1684assert(Def->findRegisterDefOperandIdx(
MI.getOperand(1).getReg(),
1685/*TRI=*/nullptr) != -1);
1687MI.getOperand(0).setReg(PhiReg);
1691for (
auto *
P : PhiToDelete)
1692P->eraseFromParent();
1694// Helper to clone Phi instructions into the destination block. We clone Phi 1695// greedily to avoid combinatorial explosion of Phi instructions. 1698 DestBB->
insert(InsertPt, NewMI);
1699Register OrigR = Phi->getOperand(0).getReg();
1717// If we are using a phi from the source block we need to add a new phi 1718// pointing to the old one. 1720if (
Use &&
Use->isPHI() &&
Use->getParent() == SourceBB) {
1735for (
unsignedI = 0;
I < distance; ++
I) {
1738unsigned LoopRegIdx = 3, InitRegIdx = 1;
1744return CanonicalUseReg;
1753// Peel out the prologs. 1762// Create a block that will end up as the new loop exiting block (dominated by 1763// all prologs and epilogs). It will only contain PHIs, in the same order as 1764// BB's PHIs. This gives us a poor-man's LCSSA with the inductive property 1765// that the exiting block is a (sub) clone of BB. This in turn gives us the 1766// property that any value deffed in BB but used outside of BB is used by a 1767// PHI in the exiting block. 1769 EliminateDeadPhis(ExitingBB,
MRI,
LIS,
/*KeepSingleSrcPhi=*/true);
1770// Push out the epilogs, again in reverse order. 1771// We can't assume anything about the minumum loop trip count at this point, 1772// so emit a fairly complex epilog. 1774// We first peel number of stages minus one epilogue. Then we remove dead 1775// stages and reorder instructions based on their stage. If we have 3 stages 1776// we generate first: 1780// And then we move instructions based on their stages to have: 1784// The transformation is legal because we only move instructions past 1785// instructions of a previous loop iteration. 1790// Keep track at which iteration each phi belongs to. We need it to know 1791// what version of the variable to use during prologue/epilogue stitching. 1792 EliminateDeadPhis(
B,
MRI,
LIS,
/*KeepSingleSrcPhi=*/true);
1798for (
size_t J =
I; J <
Epilogs.size(); J++) {
1801// Move stage one block at a time so that Phi nodes are updated correctly. 1802for (
size_t K = Iteration; K >
I; K--)
1810// Now we've defined all the prolog and epilog blocks as a fallthrough 1811// sequence, add the edges that will be followed if the loop trip count is 1812// lower than the number of stages (connecting prologs directly with epilogs). 1816for (; PI !=
Prologs.end(); ++PI, ++EI) {
1818 (*PI)->addSuccessor(*EI);
1822if (
Use &&
Use->getParent() == Pred) {
1824if (CanonicalUse->
isPHI()) {
1825// If the use comes from a phi we need to skip as many phi as the 1826// distance between the epilogue and the kernel. Trace through the phi 1827// chain to find the right value. 1837// Create a list of all blocks in order. 1843// Iterate in reverse order over all instructions, remapping as we go. 1845for (
autoI =
B->instr_rbegin();
1846I != std::next(
B->getFirstNonPHI()->getReverseIterator());) {
1854MI->eraseFromParent();
1858// Now all remapping has been done, we're free to optimize the generated code. 1861 EliminateDeadPhis(ExitingBB,
MRI,
LIS);
1873// Clone all phis in BB into NewBB and rewrite. 1880if (
Use.getParent() !=
BB)
1883Use->substituteRegister(OldR, R,
/*SubIdx=*/0,
1892 Exit->replacePhiUsesWith(
BB, NewBB);
1899assert(CanAnalyzeBr &&
"Must be able to analyze the loop branch!");
1911unsigned OpIdx =
MI->findRegisterDefOperandIdx(Reg,
/*TRI=*/nullptr);
1917// This is an illegal PHI. The loop-carried (desired) value is operand 3, 1918// and it is produced by this block. 1923 R =
MI->getOperand(1).getReg();
1926// Postpone deleting the Phi as it may be referenced by BlockMIs and used 1927// later to figure out how to remap registers. 1928MI->getOperand(0).setReg(PhiR);
1934if (Stage == -1 ||
LiveStages.count(
MI->getParent()) == 0 ||
1936// Instruction is live, no rewriting to do. 1942// Only PHIs can use values from this block by construction. 1943// Match with the equivalent PHI in B. 1949for (
auto &Sub : Subs)
1950 Sub.first->substituteRegister(DefMO.getReg(), Sub.second,
/*SubIdx=*/0,
1955MI->eraseFromParent();
1959// Work outwards from the kernel. 1960bool KernelDisposed =
false;
1969 std::optional<bool> StaticallyGreater =
1971if (!StaticallyGreater) {
1973// Dynamically branch based on Cond. 1975 }
elseif (*StaticallyGreater ==
false) {
1977// Prolog never falls through; branch to epilog and orphan interior 1978// blocks. Leave it to unreachable-block-elim to clean up. 1979Prolog->removeSuccessor(Fallthrough);
1985 KernelDisposed =
true;
1988// Prolog always falls through; remove incoming values in epilog. 1997if (!KernelDisposed) {
2026// Dump the schedule before we invalidate and remap all its instructions. 2027// Stash it in a string so we can print it if we found an error. 2028 std::string ScheduleDump;
2033// First, run the normal ModuleScheduleExpander. We don't support any 2040if (!ExpandedKernel) {
2041// The expander optimized away the kernel. We can't do any useful checking. 2045// Before running the KernelRewriter, re-add BB into the CFG. 2048// Now run the new expansion algorithm. 2053// Collect all illegal phis that the new algorithm created. We'll give these 2054// to KernelOperandInfo. 2058 IllegalPhis.
insert(&*NI);
2061// Co-iterate across both kernels. We expect them to be identical apart from 2062// phis and full COPYs (we look through both). 2064auto OI = ExpandedKernel->
begin();
2066for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2067while (OI->isPHI() || OI->isFullCopy())
2069while (NI->isPHI() || NI->isFullCopy())
2071assert(OI->getOpcode() == NI->getOpcode() &&
"Opcodes don't match?!");
2072// Analyze every operand separately. 2073for (
auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2074 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2076 KernelOperandInfo(&*NOpI,
MRI, IllegalPhis));
2080for (
auto &OldAndNew : KOIs) {
2081if (OldAndNew.first == OldAndNew.second)
2084errs() <<
"Modulo kernel validation error: [\n";
2085errs() <<
" [golden] ";
2086 OldAndNew.first.print(
errs());
2088 OldAndNew.second.print(
errs());
2093errs() <<
"Golden reference kernel:\n";
2095errs() <<
"New kernel:\n";
2097errs() << ScheduleDump;
2099"Modulo kernel validation (-pipeliner-experimental-cg) failed");
2102// Cleanup by removing BB from the CFG again as the original 2103// ModuloScheduleExpander intended. 2111// TODO: Offset information needs to be corrected. 2117/// Create a dedicated exit for Loop. Exit is the original exit for Loop. 2118/// If it is already dedicated exit, return it. Otherwise, insert a new 2119/// block between them and return the new block. 2122if (Exit->pred_size() == 1)
2143Loop->replaceSuccessor(Exit, NewExit);
2144TII->insertUnconditionalBranch(*NewExit, Exit,
DebugLoc());
2147 Exit->replacePhiUsesWith(
Loop, NewExit);
2152/// Insert branch code into the end of MBB. It branches to GreaterThan if the 2153/// remaining trip count for instructions in LastStage0Insts is greater than 2154/// RequiredTC, and to Otherwise otherwise. 2157 InstrMapTy &LastStage0Insts,
2161LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC,
MBB,
Cond,
2165// Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and 2166// FBB for optimal performance. 2175/// Generate a pipelined loop that is unrolled by using MVE algorithm and any 2176/// other necessary blocks. The control flow is modified to execute the 2177/// pipelined loop if the trip count satisfies the condition, otherwise the 2178/// original loop. The original loop is also used to execute the remainder 2179/// iterations which occur due to unrolling. 2180void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2181// The control flow for pipelining with MVE: 2184// // The block that is originally the loop preheader 2188// // Check whether the trip count satisfies the requirements to pipeline. 2189// if (LoopCounter > NumStages + NumUnroll - 2) 2190// // The minimum number of iterations to pipeline = 2191// // iterations executed in prolog/epilog (NumStages-1) + 2192// // iterations executed in one kernel run (NumUnroll) 2194// // fallback to the original loop 2198// // All prolog stages. There are no direct branches to the epilogue. 2202// // NumUnroll copies of the kernel 2203// if (LoopCounter > MVE-1) 2208// // All epilog stages. 2209// if (LoopCounter > 0) 2210// // The remainder is executed in the original loop 2215// // Newly created preheader for the original loop. 2216// // The initial values of the phis in the loop are merged from two paths. 2217// NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog 2221// // The original loop block. 2222// if (LoopCounter != 0) 2227// // Newly created dedicated exit for the original loop. 2228// // Merge values which are referenced after the loop 2229// Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog 2233// // The block that is originally the loop exit. 2234// // If it is already deicated exit, NewExit is not created. 2236// An example of where each stage is executed: 2237// Assume #Stages 3, #MVE 4, #Iterations 12 2238// Iter 0 1 2 3 4 5 6 7 8 9 10-11 2239// ------------------------------------------------- 2241// Stage 1 0 Prolog#1 2242// Stage 2 1 0 Kernel Unroll#0 Iter#0 2243// Stage 2 1 0 Kernel Unroll#1 Iter#0 2244// Stage 2 1 0 Kernel Unroll#2 Iter#0 2245// Stage 2 1 0 Kernel Unroll#3 Iter#0 2246// Stage 2 1 0 Kernel Unroll#0 Iter#1 2247// Stage 2 1 0 Kernel Unroll#1 Iter#1 2248// Stage 2 1 0 Kernel Unroll#2 Iter#1 2249// Stage 2 1 0 Kernel Unroll#3 Iter#1 2250// Stage 2 1 Epilog#0 2252// Stage 0-2 OrigKernel 2283Prolog->addSuccessor(NewKernel);
2288Epilog->addSuccessor(NewPreheader);
2289Epilog->addSuccessor(NewExit);
2291 InstrMapTy LastStage0Insts;
2292 insertCondBranch(*Check, Schedule.
getNumStages() + NumUnroll - 2,
2293 LastStage0Insts, *Prolog, *NewPreheader);
2295// VRMaps map (prolog/kernel/epilog phase#, original register#) to new 2298 generateProlog(PrologVRMap);
2299 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2300 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2303/// Replace MI's use operands according to the maps. 2304void ModuloScheduleExpanderMVE::updateInstrUse(
2308// If MI is in the prolog/kernel/epilog block, CurVRMap is 2309// PrologVRMap/KernelVRMap/EpilogVRMap respectively. 2310// PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively. 2311// Refer to the appropriate map according to the stage difference between 2312// MI and the definition of an operand. 2315if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2320if (!DefInst || DefInst->
getParent() != OrigKernel)
2322unsigned InitReg = 0;
2323unsigned DefReg = OrigReg;
2324if (DefInst->
isPHI()) {
2327getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2328// LoopReg is guaranteed to be defined within the loop by canApply() 2330 DefInst =
MRI.getVRegDef(LoopReg);
2332unsigned DefStageNum = Schedule.
getStage(DefInst);
2333 DiffStage += StageNum - DefStageNum;
2335if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].
count(DefReg))
2336// NewReg is defined in a previous phase of the same block 2337 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2339// Since this is the first iteration, refer the initial register of the 2343// Cases where DiffStage is larger than PhaseNum. 2344// If MI is in the kernel block, the value is defined by the previous 2345// iteration and PhiVRMap is referenced. If MI is in the epilog block, the 2346// value is defined in the kernel block and KernelVRMap is referenced. 2347 NewReg = (*PrevVRMap)[PrevVRMap->
size() - (DiffStage - PhaseNum)][DefReg];
2350MRI.constrainRegClass(NewReg,
MRI.getRegClass(OrigReg));
2352 UseMO.setReg(NewReg);
2354Register SplitReg =
MRI.createVirtualRegister(
MRI.getRegClass(OrigReg));
2355BuildMI(*OrigKernel,
MI,
MI->getDebugLoc(), TII->
get(TargetOpcode::COPY),
2358 UseMO.setReg(SplitReg);
2363/// Return a phi if Reg is referenced by the phi. 2364/// canApply() guarantees that at most only one such phi exists. 2367unsigned InitVal, LoopVal;
2375/// Generate phis for registers defined by OrigMI. 2376void ModuloScheduleExpanderMVE::generatePhi(
2381int StageNum = Schedule.
getStage(OrigMI);
2383if (Schedule.
getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2385elseif (Schedule.
getNumStages() - NumUnroll + UnrollNum == StageNum)
2386 UsePrologReg =
false;
2390// Examples that show which stages are merged by phi. 2391// Meaning of the symbol following the stage number: 2392// a/b: Stages with the same letter are merged (UsePrologReg == true) 2393// +: Merged with the initial value (UsePrologReg == false) 2394// *: No phis required 2397// Iter 0 1 2 3 4 5 6 7 8 2398// ----------------------------------------- 2400// Stage 1a 0b Prolog#1 2401// Stage 2* 1* 0* Kernel Unroll#0 2402// Stage 2* 1* 0+ Kernel Unroll#1 2403// Stage 2* 1+ 0a Kernel Unroll#2 2404// Stage 2+ 1a 0b Kernel Unroll#3 2407// Iter 0 1 2 3 4 5 6 7 8 2408// ----------------------------------------- 2410// Stage 1a 0b Prolog#1 2411// Stage 2* 1+ 0a Kernel Unroll#0 2412// Stage 2+ 1a 0b Kernel Unroll#1 2415// Iter 0 1 2 3 4 5 6 7 8 2416// ----------------------------------------- 2418// Stage 1a 0b Prolog#1 2419// Stage 2+ 1a 0b Kernel Unroll#0 2422if (!DefMO.isReg() || DefMO.isDead())
2425auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2426if (NewReg == KernelVRMap[UnrollNum].
end())
2430int PrologNum = Schedule.
getNumStages() - NumUnroll + UnrollNum - 1;
2431 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2440Register PhiReg =
MRI.createVirtualRegister(
MRI.getRegClass(OrigReg));
2442 TII->
get(TargetOpcode::PHI), PhiReg)
2447 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2453for (
unsignedIdx = 1;
Idx < Phi.getNumOperands();
Idx += 2) {
2454if (Phi.getOperand(
Idx).getReg() == OrigReg) {
2455 Phi.getOperand(
Idx).setReg(NewReg);
2456 Phi.getOperand(
Idx + 1).setMBB(NewMBB);
2462/// Generate phis that merge values from multiple routes 2463void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(
Register OrigReg,
2471if (
O.getParent()->getParent() != OrigKernel &&
2472O.getParent()->getParent() != Prolog &&
2473O.getParent()->getParent() != NewKernel &&
2474O.getParent()->getParent() != Epilog)
2476if (
O.getParent()->getParent() == OrigKernel &&
O.getParent()->isPHI())
2480// Merge the route that only execute the pipelined loop (when there are no 2481// remaining iterations) with the route that execute the original loop. 2482if (!UsesAfterLoop.
empty()) {
2483Register PhiReg =
MRI.createVirtualRegister(
MRI.getRegClass(OrigReg));
2485 TII->
get(TargetOpcode::PHI), PhiReg)
2498// Merge routes from the pipelined loop and the bypassed route before the 2500if (!LoopPhis.
empty()) {
2502unsigned InitReg, LoopReg;
2503getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2504Register NewInit =
MRI.createVirtualRegister(
MRI.getRegClass(InitReg));
2506 TII->
get(TargetOpcode::PHI), NewInit)
2516void ModuloScheduleExpanderMVE::generateProlog(
2518 PrologVRMap.
clear();
2521for (
int PrologNum = 0; PrologNum < Schedule.
getNumStages() - 1;
2527if (StageNum > PrologNum)
2530 updateInstrDef(NewMI, PrologVRMap[PrologNum],
false);
2531 NewMIMap[NewMI] = {PrologNum, StageNum};
2536for (
autoI : NewMIMap) {
2538int PrologNum =
I.second.first;
2539int StageNum =
I.second.second;
2540 updateInstrUse(
MI, StageNum, PrologNum, PrologVRMap,
nullptr);
2544dbgs() <<
"prolog:\n";
2549void ModuloScheduleExpanderMVE::generateKernel(
2552 KernelVRMap.
clear();
2553 KernelVRMap.
resize(NumUnroll);
2555 PhiVRMap.
resize(NumUnroll);
2558for (
int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2564if (UnrollNum == NumUnroll - 1)
2565 LastStage0Insts[
MI] = NewMI;
2566 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2567 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2568 generatePhi(
MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2569 NewMIMap[NewMI] = {UnrollNum, StageNum};
2574for (
autoI : NewMIMap) {
2576int UnrollNum =
I.second.first;
2577int StageNum =
I.second.second;
2578 updateInstrUse(
MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2581// If remaining trip count is greater than NumUnroll-1, loop continues 2582 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2586dbgs() <<
"kernel:\n";
2591void ModuloScheduleExpanderMVE::generateEpilog(
2594 EpilogVRMap.
clear();
2597for (
int EpilogNum = 0; EpilogNum < Schedule.
getNumStages() - 1;
2603if (StageNum <= EpilogNum)
2606 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2607 NewMIMap[NewMI] = {EpilogNum, StageNum};
2612for (
autoI : NewMIMap) {
2614int EpilogNum =
I.second.first;
2615int StageNum =
I.second.second;
2616 updateInstrUse(
MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2619// If there are remaining iterations, they are executed in the original loop. 2620// Instructions related to loop control, such as loop counter comparison, 2621// are indicated by shouldIgnoreForPipelining() and are assumed to be placed 2622// in stage 0. Thus, the map is for the last one in the kernel. 2623 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2626dbgs() <<
"epilog:\n";
2631/// Calculate the number of unroll required and set it to NumUnroll 2632void ModuloScheduleExpanderMVE::calcNumUnroll() {
2649int NumUnrollLocal = 1;
2652// canApply() guarantees that DefMI is not phi and is an instruction in 2657if (Inst2Idx[
MI] <= Inst2Idx[
DefMI])
2659 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2665/// Create new virtual registers for definitions of NewMI and update NewMI. 2666/// If the definitions are referenced after the pipelined loop, phis are 2667/// created to merge with other routes. 2668void ModuloScheduleExpanderMVE::updateInstrDef(
MachineInstr *NewMI,
2678 VRMap[
Reg] = NewReg;
2680 mergeRegUsesAfterPipeline(Reg, NewReg);
2691 generatePipelinedLoop();
2694/// Check if ModuloScheduleExpanderMVE can be applied to L 2696if (!L.getExitBlock()) {
2697LLVM_DEBUG(
dbgs() <<
"Can not apply MVE expander: No single exit block.\n");
2704// Put some constraints on the operands of the phis to simplify the 2708// Registers defined by phis must be used only inside the loop and be never 2713if (
Ref.getParent() != BB ||
Ref.isPHI()) {
2714LLVM_DEBUG(
dbgs() <<
"Can not apply MVE expander: A phi result is " 2715"referenced outside of the loop or by phi.\n");
2719// A source register from the loop block must be defined inside the loop. 2720// A register defined inside the loop must be referenced by only one phi at 2722unsigned InitVal, LoopVal;
2724if (!
Register(LoopVal).isVirtual() ||
2725MRI.getVRegDef(LoopVal)->getParent() != BB) {
2727dbgs() <<
"Can not apply MVE expander: A phi source value coming " 2728"from the loop is not defined in the loop.\n");
2731if (UsedByPhi.
count(LoopVal)) {
2732LLVM_DEBUG(
dbgs() <<
"Can not apply MVE expander: A value defined in the " 2733"loop is referenced by two or more phis.\n");
2736 UsedByPhi.
insert(LoopVal);
2742//===----------------------------------------------------------------------===// 2743// ModuloScheduleTestPass implementation 2744//===----------------------------------------------------------------------===// 2745// This pass constructs a ModuloSchedule from its module and runs 2746// ModuloScheduleExpander. 2748// The module is expected to contain a single-block analyzable loop. 2749// The total order of instructions is taken from the loop as-is. 2750// Instructions are expected to be annotated with a PostInstrSymbol. 2751// This PostInstrSymbol must have the following format: 2752// "Stage=%d Cycle=%d". 2753//===----------------------------------------------------------------------===// 2775char ModuloScheduleTest::ID = 0;
2778"Modulo Schedule test pass",
false,
false)
2785MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2786for (
auto *L : MLI) {
2787if (L->getTopBlock() != L->getBottomBlock())
2796 std::pair<StringRef, StringRef> StageAndCycle = getToken(S,
"_");
2797 std::pair<StringRef, StringRef> StageTokenAndValue =
2798 getToken(StageAndCycle.first,
"-");
2799 std::pair<StringRef, StringRef> CycleTokenAndValue =
2800 getToken(StageAndCycle.second,
"-");
2801if (StageTokenAndValue.first !=
"Stage" ||
2802 CycleTokenAndValue.first !=
"_Cycle") {
2804"Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2808 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2809 CycleTokenAndValue.second.drop_front().getAsInteger(10,
Cycle);
2811dbgs() <<
" Stage=" << Stage <<
", Cycle=" <<
Cycle <<
"\n";
2815LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
2817dbgs() <<
"--- ModuloScheduleTest running on BB#" << BB->
getNumber() <<
"\n";
2820 std::vector<MachineInstr *> Instrs;
2822if (
MI.isTerminator())
2824 Instrs.push_back(&
MI);
2826dbgs() <<
"Parsing post-instr symbol for " <<
MI;
2839//===----------------------------------------------------------------------===// 2840// ModuloScheduleTestAnnotater implementation 2841//===----------------------------------------------------------------------===// 2849MI->setPostInstrSymbol(MF,
Sym);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::optional< std::vector< StOtherPiece > > Other
DenseMap< Block *, BlockRelaxAux > Blocks
global merge Global merge function pass
const HexagonInstrInfo * TII
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)
Create a dedicated exit for Loop.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some functions that are useful when dealing with strings.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Implements a dense probed hash-table based set.
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
succ_iterator succ_begin()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
pred_iterator pred_begin()
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
ModuloSchedule & Schedule
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
const TargetInstrInfo * TII
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
MachineRegisterInfo & MRI
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
reverse_iterator rbegin()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt copy(R &&Range, OutputIt Out)
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...