1//===-------------- GCNRewritePartialRegUses.cpp --------------------------===// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7//===----------------------------------------------------------------------===// 9/// RenameIndependentSubregs pass leaves large partially used super registers, 11/// undef %0.sub4:VReg_1024 = ... 12/// %0.sub5:VReg_1024 = ... 13/// %0.sub6:VReg_1024 = ... 14/// %0.sub7:VReg_1024 = ... 15/// use %0.sub4_sub5_sub6_sub7 18/// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and 19/// rewrites such partially used super registers with registers of minimal size: 20/// undef %0.sub0:VReg_128 = ... 21/// %0.sub1:VReg_128 = ... 22/// %0.sub2:VReg_128 = ... 23/// %0.sub3:VReg_128 = ... 24/// use %0.sub0_sub1_sub2_sub3 27/// This allows to avoid subreg lanemasks tracking during register pressure 28/// calculation and creates more possibilities for the code unaware of lanemasks 29//===----------------------------------------------------------------------===// 43#define DEBUG_TYPE "rewrite-partial-reg-uses" 53return"Rewrite Partial Register Uses";
71 /// Rewrite partially used register Reg by shifting all its subregisters to 72 /// the right and replacing the original register with a register of minimal 73 /// size. Return true if the change has been made. 76 /// Value type for SubRegMap below. 78 /// Register class required to hold the value stored in the SubReg. 81 /// Index for the right-shifted subregister. If 0 this is the "covering" 82 /// subreg i.e. subreg that covers all others. Covering subreg becomes the 83 /// whole register after the replacement. 84unsignedSubReg = AMDGPU::NoSubRegister;
88 /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container. 91 /// Given register class RC and the set of used subregs as keys in the SubRegs 92 /// map return new register class and indexes of right-shifted subregs as 93 /// values in SubRegs map such that the resulting regclass would contain 94 /// registers of minimal size. 96 SubRegMap &SubRegs)
const;
98 /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to 99 /// find new regclass such that: 100 /// 1. It has subregs obtained by shifting each OldSubReg by RShift number 101 /// of bits to the right. Every "shifted" subreg should have the same 102 /// SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers" 103 /// all other subregs in pairs. Basically such subreg becomes a whole 105 /// 2. Resulting register class contains registers of minimal size but not 106 /// less than RegNumBits. 108 /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out 110 /// OldSubReg - input parameter, 111 /// SubRegRC - input parameter (cannot be null), 112 /// NewSubReg - output, contains shifted subregs on return. 115unsigned RegNumBits,
unsigned CoverSubregIdx,
116 SubRegMap &SubRegs)
const;
118 /// Update live intervals after rewriting OldReg to NewReg with SubRegs map 119 /// describing OldSubReg -> NewSubReg mapping. 121 SubRegMap &SubRegs)
const;
125 /// Return reg class expected by a MO's parent instruction for a given MO. 128 /// Find right-shifted by RShift amount version of the SubReg if it exists, 129 /// return 0 otherwise. 130unsigned shiftSubReg(
unsignedSubReg,
unsigned RShift)
const;
132 /// Find subreg index with a given Offset and Size, return 0 if there is no 133 /// such subregister index. The result is cached in SubRegs data-member. 134unsigned getSubReg(
unsignedOffset,
unsignedSize)
const;
136 /// Cache for getSubReg method: {Offset, Size} -> SubReg index. 139 /// Return bit mask that contains all register classes that are projected into 140 /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member. 142unsigned SubRegIdx)
const;
144 /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask. 149 /// Return bitmask containing all allocatable register classes with registers 150 /// aligned at AlignNumBits. The result is cached in 151 /// AllocatableAndAlignedRegClassMasks data-member. 153 getAllocatableAndAlignedRegClassMask(
unsigned AlignNumBits)
const;
155 /// Cache for getAllocatableAndAlignedRegClassMask method: 156 /// AlignNumBits -> Class bitmask. 160}
// end anonymous namespace 162// TODO: move this to the tablegen and use binary search by Offset. 163unsigned GCNRewritePartialRegUses::getSubReg(
unsignedOffset,
167for (
unsignedIdx = 1, E =
TRI->getNumSubRegIndices();
Idx < E; ++
Idx) {
178unsigned GCNRewritePartialRegUses::shiftSubReg(
unsignedSubReg,
179unsigned RShift)
const{
186unsigned SubRegIdx)
const{
188 SuperRegMasks.try_emplace({RC, SubRegIdx},
nullptr);
191if (RCI.getSubReg() == SubRegIdx) {
192I->second = RCI.getMask();
200constBitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
201unsigned AlignNumBits)
const{
203 AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
207for (
unsigned ClassID = 0; ClassID <
TRI->getNumRegClasses(); ++ClassID) {
208auto *RC =
TRI->getRegClass(ClassID);
217GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
219unsigned CoverSubregIdx, SubRegMap &SubRegs)
const{
221unsigned RCAlign =
TRI->getRegClassAlignmentNumBits(RC);
222LLVM_DEBUG(
dbgs() <<
" Shift " << RShift <<
", reg align " << RCAlign
225BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
226for (
auto &[OldSubReg, SRI] : SubRegs) {
227auto &[SubRegRC, NewSubReg] = SRI;
231 <<
TRI->getRegClassName(SubRegRC)
232 << (SubRegRC->isAllocatable() ?
"" :
" not alloc")
235if (OldSubReg == CoverSubregIdx) {
236// Covering subreg will become a full register, RC should be allocatable. 237assert(SubRegRC->isAllocatable());
238 NewSubReg = AMDGPU::NoSubRegister;
241 NewSubReg = shiftSubReg(OldSubReg, RShift);
249constuint32_t *
Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
250 : SubRegRC->getSubClassMask();
254 ClassMask.clearBitsNotInMask(Mask);
255// Don't try to early exit because checking if ClassMask has set bits isn't 256// that cheap and we expect it to pass in most cases. 257LLVM_DEBUG(
dbgs() <<
", num regclasses " << ClassMask.count() <<
'\n');
260// ClassMask is the set of all register classes such that each class is 261// allocatable, aligned, has all shifted subregs and each subreg has required 262// register class (see SubRegRC above). Now select first (that is largest) 263// register class with registers of minimal but not less than RegNumBits size. 264// We have to check register size because we may encounter classes of smaller 265// registers like VReg_1 in some situations. 267unsigned MinNumBits = std::numeric_limits<unsigned>::max();
268for (
unsigned ClassID : ClassMask.set_bits()) {
269auto *RC =
TRI->getRegClass(ClassID);
270unsigned NumBits =
TRI->getRegSizeInBits(*RC);
271if (NumBits < MinNumBits && NumBits >= RegNumBits) {
272 MinNumBits = NumBits;
275if (MinNumBits == RegNumBits)
281for (
auto [
SubReg, SRI] : SubRegs)
282// Check that all registers in MinRC support SRI.SubReg subregister. 283assert(MinRC ==
TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
286// There might be zero RShift - in this case we just trying to find smaller 288return (MinRC != RC || RShift != 0) ? MinRC :
nullptr;
293 SubRegMap &SubRegs)
const{
294unsigned CoverSubreg = AMDGPU::NoSubRegister;
295unsignedOffset = std::numeric_limits<unsigned>::max();
297for (
auto [
SubReg, SRI] : SubRegs) {
298unsigned SubRegOffset =
TRI->getSubRegIdxOffset(
SubReg);
299unsigned SubRegEnd = SubRegOffset +
TRI->getSubRegIdxSize(
SubReg);
300if (SubRegOffset <
Offset) {
302 CoverSubreg = AMDGPU::NoSubRegister;
306 CoverSubreg = AMDGPU::NoSubRegister;
308if (SubRegOffset ==
Offset && SubRegEnd ==
End)
311// If covering subreg is found shift everything so the covering subreg would 312// be in the rightmost position. 313if (CoverSubreg != AMDGPU::NoSubRegister)
314return getRegClassWithShiftedSubregs(RC,
Offset,
End -
Offset, CoverSubreg,
317// Otherwise find subreg with maximum required alignment and shift it and all 318// other subregs to the rightmost possible position with respect to the 320unsigned MaxAlign = 0;
321for (
auto [
SubReg, SRI] : SubRegs)
322 MaxAlign = std::max(MaxAlign,
TRI->getSubRegAlignmentNumBits(RC,
SubReg));
324unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
325for (
auto [
SubReg, SRI] : SubRegs) {
326if (
TRI->getSubRegAlignmentNumBits(RC,
SubReg) != MaxAlign)
328 FirstMaxAlignedSubRegOffset =
329 std::min(FirstMaxAlignedSubRegOffset,
TRI->getSubRegIdxOffset(
SubReg));
330if (FirstMaxAlignedSubRegOffset ==
Offset)
334unsigned NewOffsetOfMaxAlignedSubReg =
337if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
340unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
341return getRegClassWithShiftedSubregs(RC, RShift,
End - RShift, 0, SubRegs);
344// Only the subrange's lanemasks of the original interval need to be modified. 345// Subrange for a covering subreg becomes the main range. 346void GCNRewritePartialRegUses::updateLiveIntervals(
Register OldReg,
348 SubRegMap &SubRegs)
const{
349if (!LIS->hasInterval(OldReg))
352auto &OldLI = LIS->getInterval(OldReg);
353auto &NewLI = LIS->createEmptyInterval(NewReg);
355auto &
Allocator = LIS->getVNInfoAllocator();
356 NewLI.setWeight(OldLI.weight());
358for (
auto &SR : OldLI.subranges()) {
360return SR.LaneMask ==
TRI->getSubRegIndexLaneMask(
P.first);
363if (
I == SubRegs.end()) {
364// There might be a situation when subranges don't exactly match used 365// subregs, for example: 366// %120 [160r,1392r:0) 0@160r 367// L000000000000C000 [160r,1392r:0) 0@160r 368// L0000000000003000 [160r,1392r:0) 0@160r 369// L0000000000000C00 [160r,1392r:0) 0@160r 370// L0000000000000300 [160r,1392r:0) 0@160r 371// L0000000000000003 [160r,1104r:0) 0@160r 372// L000000000000000C [160r,1104r:0) 0@160r 373// L0000000000000030 [160r,1104r:0) 0@160r 374// L00000000000000C0 [160r,1104r:0) 0@160r 375// but used subregs are: 376// sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF 377// sub0_sub1_sub2_sub3, L00000000000000FF 378// sub4_sub5_sub6_sub7, L000000000000FF00 379// In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7 380// have several subranges with the same lifetime. For such cases just 381// recreate the interval. 382 LIS->removeInterval(OldReg);
383 LIS->removeInterval(NewReg);
384 LIS->createAndComputeVirtRegInterval(NewReg);
388if (
unsigned NewSubReg =
I->second.SubReg)
389 NewLI.createSubRangeFrom(Allocator,
390TRI->getSubRegIndexLaneMask(NewSubReg), SR);
391else// This is the covering subreg (0 index) - set it as main range. 392 NewLI.assign(SR, Allocator);
397 NewLI.assign(OldLI, Allocator);
399 LIS->removeInterval(OldReg);
403GCNRewritePartialRegUses::getOperandRegClass(
MachineOperand &MO)
const{
405returnTII->getRegClass(
TII->get(
MI->getOpcode()),
MI->getOperandNo(&MO),
TRI,
406 *
MI->getParent()->getParent());
409bool GCNRewritePartialRegUses::rewriteReg(
Register Reg)
const{
410autoRange =
MRI->reg_nodbg_operands(Reg);
412return MO.
getSubReg() == AMDGPU::NoSubRegister;
// Whole reg used. [1] 416auto *RC =
MRI->getRegClass(Reg);
418 <<
':' <<
TRI->getRegClassName(RC) <<
'\n');
420// Collect used subregs and their reg classes infered from instruction 432 SubRegRC =
TRI->getSubRegisterClass(RC,
SubReg);
437 <<
TRI->getRegClassName(OpDescRC) <<
" = ");
438 SubRegRC =
TRI->getCommonSubClass(SubRegRC, OpDescRC);
449auto *NewRC = getMinSizeReg(RC, SubRegs);
457 <<
TRI->getRegClassName(RC) <<
" -> " 459 <<
TRI->getRegClassName(NewRC) <<
'\n');
463// Debug info can refer to the whole reg, just leave it as it is for now. 464// TODO: create some DI shift expression? 474 updateLiveIntervals(Reg, NewReg, SubRegs);
479bool GCNRewritePartialRegUses::runOnMachineFunction(
MachineFunction &MF) {
483auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
484 LIS = LISWrapper ? &LISWrapper->getLIS() :
nullptr;
486for (
size_tI = 0, E =
MRI->getNumVirtRegs();
I < E; ++
I) {
492char GCNRewritePartialRegUses::ID;
497"Rewrite Partial Register Uses",
false,
false)
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
StringRef - Represent a constant reference to a string, i.e.
bool isValid() const
Returns true if this iterator is still pointing at a valid entry.
TargetInstrInfo - Interface to description of machine instruction set.
bool isAllocatable() const
Return true if this register class may be used to create virtual registers.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
char & GCNRewritePartialRegUsesID
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.