Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit60d00ec

Browse files
authored
LSRA-throughput: Iterate over the regMaskTP instead all registers (#87424)
* replace for-loop with regMaspTP iterator* jit format* REVERT* fix a bug* address review feedback* Add genFirstRegNumFromMaskAndToggle and genFirstRegNumFromMask* Use actualRegistersMask* jit format* review feedback* Inline BitScanForward* fix build error* remove commented code
1 parentfeff67d commit60d00ec

File tree

8 files changed

+236
-161
lines changed

8 files changed

+236
-161
lines changed

‎src/coreclr/jit/codegenarm64.cpp‎

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -708,16 +708,13 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* reg
708708

709709
while (regsMask != RBM_NONE)
710710
{
711-
regMaskTP reg1Mask = genFindLowestBit(regsMask);
712-
regNumber reg1 = genRegNumFromMask(reg1Mask);
713-
regsMask &= ~reg1Mask;
711+
regNumber reg1 = genFirstRegNumFromMaskAndToggle(regsMask);
714712
regsCount -= 1;
715713

716714
bool isPairSave = false;
717715
if (regsCount > 0)
718716
{
719-
regMaskTP reg2Mask = genFindLowestBit(regsMask);
720-
regNumber reg2 = genRegNumFromMask(reg2Mask);
717+
regNumber reg2 = genFirstRegNumFromMask(regsMask);
721718
if (reg2 == REG_NEXT(reg1))
722719
{
723720
// The JIT doesn't allow saving pair (R28,FP), even though the
@@ -733,7 +730,7 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* reg
733730
{
734731
isPairSave = true;
735732

736-
regsMask&= ~reg2Mask;
733+
regsMask^= genRegMask(reg2);
737734
regsCount -= 1;
738735

739736
regStack->Push(RegPair(reg1, reg2));

‎src/coreclr/jit/compiler.hpp‎

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,50 @@ inline regNumber genRegNumFromMask(regMaskTP mask)
706706
return regNum;
707707
}
708708

709+
//------------------------------------------------------------------------------
710+
// genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a
711+
// register number and also toggle the bit in the `mask`.
712+
// Arguments:
713+
// mask - the register mask
714+
//
715+
// Return Value:
716+
// The number of the first register contained in the mask and updates the `mask` to toggle
717+
// the bit.
718+
//
719+
720+
inline regNumbergenFirstRegNumFromMaskAndToggle(regMaskTP& mask)
721+
{
722+
assert(mask !=0);// Must have one bit set, so can't have a mask of zero
723+
724+
/* Convert the mask to a register number*/
725+
726+
regNumber regNum = (regNumber)BitOperations::BitScanForward(mask);
727+
mask ^=genRegMask(regNum);
728+
729+
return regNum;
730+
}
731+
732+
//------------------------------------------------------------------------------
733+
// genFirstRegNumFromMask : Maps first bit set in the register mask to a register number.
734+
//
735+
// Arguments:
736+
// mask - the register mask
737+
//
738+
// Return Value:
739+
// The number of the first register contained in the mask.
740+
//
741+
742+
inline regNumbergenFirstRegNumFromMask(regMaskTP mask)
743+
{
744+
assert(mask !=0);// Must have one bit set, so can't have a mask of zero
745+
746+
/* Convert the mask to a register number*/
747+
748+
regNumber regNum = (regNumber)BitOperations::BitScanForward(mask);
749+
750+
return regNum;
751+
}
752+
709753
/*****************************************************************************
710754
*
711755
* Return the size in bytes of the given type.

‎src/coreclr/jit/gentree.cpp‎

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25840,9 +25840,9 @@ regNumber GenTree::ExtractTempReg(regMaskTP mask /* = (regMaskTP)-1 */)
2584025840
{
2584125841
regMaskTP availableSet = gtRsvdRegs & mask;
2584225842
assert(genCountBits(availableSet) >= 1);
25843-
regMaskTP tempRegMask =genFindLowestBit(availableSet);
25844-
gtRsvdRegs&= ~tempRegMask;
25845-
returngenRegNumFromMask(tempRegMask);
25843+
regNumber tempReg =genFirstRegNumFromMask(availableSet);
25844+
gtRsvdRegs^= genRegMask(tempReg);
25845+
returntempReg;
2584625846
}
2584725847

2584825848
//------------------------------------------------------------------------

‎src/coreclr/jit/lsra.cpp‎

Lines changed: 93 additions & 62 deletions
Large diffs are not rendered by default.

‎src/coreclr/jit/lsra.h‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,8 +1661,8 @@ class LinearScan : public LinearScanInterface
16611661
VarToRegMap* outVarToRegMaps;
16621662

16631663
// A temporary VarToRegMap used during the resolution of critical edges.
1664-
VarToRegMap sharedCriticalVarToRegMap;
1665-
1664+
VarToRegMapsharedCriticalVarToRegMap;
1665+
PhasedVar<regMaskTP> actualRegistersMask;
16661666
PhasedVar<regMaskTP> availableIntRegs;
16671667
PhasedVar<regMaskTP> availableFloatRegs;
16681668
PhasedVar<regMaskTP> availableDoubleRegs;

‎src/coreclr/jit/lsrabuild.cpp‎

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -727,33 +727,30 @@ bool LinearScan::isContainableMemoryOp(GenTree* node)
727727
//
728728
voidLinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType,bool isLastUse)
729729
{
730-
if (refType == RefTypeKill)
731-
{
732-
// The mask identifies a set of registers that will be used during
733-
// codegen. Mark these as modified here, so when we do final frame
734-
// layout, we'll know about all these registers. This is especially
735-
// important if mask contains callee-saved registers, which affect the
736-
// frame size since we need to save/restore them. In the case where we
737-
// have a copyBlk with GC pointers, can need to call the
738-
// CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and
739-
// RDI, if LSRA doesn't assign RSI/RDI, they wouldn't get marked as
740-
// modified until codegen, which is too late.
741-
compiler->codeGen->regSet.rsSetRegsModified(maskDEBUGARG(true));
742-
}
743-
744-
for (regNumber reg = REG_FIRST; mask; reg =REG_NEXT(reg), mask >>=1)
745-
{
746-
if (mask &1)
730+
assert(refType == RefTypeKill);
731+
732+
// The mask identifies a set of registers that will be used during
733+
// codegen. Mark these as modified here, so when we do final frame
734+
// layout, we'll know about all these registers. This is especially
735+
// important if mask contains callee-saved registers, which affect the
736+
// frame size since we need to save/restore them. In the case where we
737+
// have a copyBlk with GC pointers, can need to call the
738+
// CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and
739+
// RDI, if LSRA doesn't assign RSI/RDI, they wouldn't get marked as
740+
// modified until codegen, which is too late.
741+
compiler->codeGen->regSet.rsSetRegsModified(maskDEBUGARG(true));
742+
743+
for (regMaskTP candidates = mask; candidates != RBM_NONE;)
744+
{
745+
regNumber reg =genFirstRegNumFromMaskAndToggle(candidates);
746+
// This assumes that these are all "special" RefTypes that
747+
// don't need to be recorded on the tree (hence treeNode is nullptr)
748+
RefPosition* pos =newRefPosition(reg, currentLoc, refType,nullptr,
749+
genRegMask(reg));// This MUST occupy the physical register (obviously)
750+
751+
if (isLastUse)
747752
{
748-
// This assumes that these are all "special" RefTypes that
749-
// don't need to be recorded on the tree (hence treeNode is nullptr)
750-
RefPosition* pos =newRefPosition(reg, currentLoc, refType,nullptr,
751-
genRegMask(reg));// This MUST occupy the physical register (obviously)
752-
753-
if (isLastUse)
754-
{
755-
pos->lastUse =true;
756-
}
753+
pos->lastUse =true;
757754
}
758755
}
759756
}
@@ -2756,6 +2753,16 @@ void LinearScan::buildIntervals()
27562753
availableRegCount = REG_INT_COUNT;
27572754
}
27582755

2756+
if (availableRegCount < (sizeof(regMaskTP) *8))
2757+
{
2758+
// Mask out the bits that are between 64 ~ availableRegCount
2759+
actualRegistersMask = (1ULL << availableRegCount) -1;
2760+
}
2761+
else
2762+
{
2763+
actualRegistersMask = ~RBM_NONE;
2764+
}
2765+
27592766
#ifdef DEBUG
27602767
// Make sure we don't have any blocks that were not visited
27612768
for (BasicBlock*const block : compiler->Blocks())

‎src/coreclr/jit/utils.cpp‎

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -2556,66 +2556,6 @@ double FloatingPointUtils::normalize(double value)
25562556
#endif
25572557
}
25582558

2559-
//------------------------------------------------------------------------
2560-
// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit
2561-
// (MSB) for a set bit (1)
2562-
//
2563-
// Arguments:
2564-
// value - the value
2565-
//
2566-
// Return Value:
2567-
// 0 if the mask is zero; nonzero otherwise.
2568-
//
2569-
uint32_tBitOperations::BitScanForward(uint32_t value)
2570-
{
2571-
assert(value !=0);
2572-
2573-
#if defined(_MSC_VER)
2574-
unsignedlong result;
2575-
::_BitScanForward(&result, value);
2576-
returnstatic_cast<uint32_t>(result);
2577-
#else
2578-
int32_t result =__builtin_ctz(value);
2579-
returnstatic_cast<uint32_t>(result);
2580-
#endif
2581-
}
2582-
2583-
//------------------------------------------------------------------------
2584-
// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit
2585-
// (MSB) for a set bit (1)
2586-
//
2587-
// Arguments:
2588-
// value - the value
2589-
//
2590-
// Return Value:
2591-
// 0 if the mask is zero; nonzero otherwise.
2592-
//
2593-
uint32_tBitOperations::BitScanForward(uint64_t value)
2594-
{
2595-
assert(value !=0);
2596-
2597-
#if defined(_MSC_VER)
2598-
#if defined(HOST_64BIT)
2599-
unsignedlong result;
2600-
::_BitScanForward64(&result, value);
2601-
returnstatic_cast<uint32_t>(result);
2602-
#else
2603-
uint32_t lower =static_cast<uint32_t>(value);
2604-
2605-
if (lower ==0)
2606-
{
2607-
uint32_t upper =static_cast<uint32_t>(value >>32);
2608-
return32 +BitScanForward(upper);
2609-
}
2610-
2611-
returnBitScanForward(lower);
2612-
#endif// HOST_64BIT
2613-
#else
2614-
int32_t result =__builtin_ctzll(value);
2615-
returnstatic_cast<uint32_t>(result);
2616-
#endif
2617-
}
2618-
26192559
//------------------------------------------------------------------------
26202560
// BitOperations::BitScanReverse: Search the mask data from most significant bit (MSB) to least significant bit
26212561
// (LSB) for a set bit (1).

‎src/coreclr/jit/utils.h‎

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -775,9 +775,65 @@ class FloatingPointUtils
775775
classBitOperations
776776
{
777777
public:
778-
staticuint32_tBitScanForward(uint32_t value);
778+
//------------------------------------------------------------------------
779+
// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit
780+
// (MSB) for a set bit (1)
781+
//
782+
// Arguments:
783+
// value - the value
784+
//
785+
// Return Value:
786+
// 0 if the mask is zero; nonzero otherwise.
787+
//
788+
FORCEINLINEstaticuint32_tBitScanForward(uint32_t value)
789+
{
790+
assert(value !=0);
791+
792+
#if defined(_MSC_VER)
793+
unsignedlong result;
794+
::_BitScanForward(&result, value);
795+
returnstatic_cast<uint32_t>(result);
796+
#else
797+
int32_t result =__builtin_ctz(value);
798+
returnstatic_cast<uint32_t>(result);
799+
#endif
800+
}
801+
802+
//------------------------------------------------------------------------
803+
// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit
804+
// (MSB) for a set bit (1)
805+
//
806+
// Arguments:
807+
// value - the value
808+
//
809+
// Return Value:
810+
// 0 if the mask is zero; nonzero otherwise.
811+
//
812+
FORCEINLINEstaticuint32_tBitScanForward(uint64_t value)
813+
{
814+
assert(value !=0);
815+
816+
#if defined(_MSC_VER)
817+
#if defined(HOST_64BIT)
818+
unsignedlong result;
819+
::_BitScanForward64(&result, value);
820+
returnstatic_cast<uint32_t>(result);
821+
#else
822+
uint32_t lower =static_cast<uint32_t>(value);
779823

780-
staticuint32_tBitScanForward(uint64_t value);
824+
if (lower ==0)
825+
{
826+
uint32_t upper =static_cast<uint32_t>(value >>32);
827+
return32 +BitScanForward(upper);
828+
}
829+
830+
returnBitScanForward(lower);
831+
#endif// HOST_64BIT
832+
#else
833+
int32_t result =__builtin_ctzll(value);
834+
returnstatic_cast<uint32_t>(result);
835+
#endif
836+
}
781837

782838
staticuint32_tBitScanReverse(uint32_t value);
783839

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp