Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6b25c3b

Browse files
EgorBoEgorjakobbotsch
authored
JIT: Merge consecutive stores (#92852)
Co-authored-by: Egor <egorbo@Egors-MacBook-Pro.local>Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
1 parent7abea9e commit6b25c3b

File tree

2 files changed

+281
-0
lines changed

2 files changed

+281
-0
lines changed

‎src/coreclr/jit/lower.cpp‎

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7802,6 +7802,285 @@ void Lowering::ContainCheckBitCast(GenTree* node)
78027802
}
78037803
}
78047804

7805+
structStoreCoalescingData
7806+
{
7807+
var_types targetType;
7808+
GenTree* baseAddr;
7809+
GenTree* index;
7810+
GenTree* value;
7811+
uint32_t scale;
7812+
int offset;
7813+
};
7814+
7815+
//------------------------------------------------------------------------
7816+
// GetStoreCoalescingData: given a STOREIND node, get the data needed to perform
7817+
// store coalescing including pointer to the previous node.
7818+
//
7819+
// Arguments:
7820+
// comp - the compiler instance
7821+
// ind - the STOREIND node
7822+
// data - [OUT] the data needed for store coalescing
7823+
//
7824+
// Return Value:
7825+
// true if the data was successfully retrieved, false otherwise.
7826+
// Basically, false means that we definitely can't do store coalescing.
7827+
//
7828+
staticboolGetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCoalescingData* data)
7829+
{
7830+
// Don't merge volatile stores.
7831+
if (ind->IsVolatile())
7832+
{
7833+
returnfalse;
7834+
}
7835+
7836+
// Data has to be INT_CNS, can be also VEC_CNS in future.
7837+
if (!ind->Data()->IsCnsIntOrI())
7838+
{
7839+
returnfalse;
7840+
}
7841+
7842+
data->targetType = ind->TypeGet();
7843+
data->value = ind->Data();
7844+
if (ind->Addr()->OperIs(GT_LEA))
7845+
{
7846+
GenTree* base = ind->Addr()->AsAddrMode()->Base();
7847+
GenTree* index = ind->Addr()->AsAddrMode()->Index();
7848+
if ((base ==nullptr) || !base->OperIs(GT_LCL_VAR) || comp->lvaVarAddrExposed(base->AsLclVar()->GetLclNum()))
7849+
{
7850+
// Base must be a local. It's possible for it to be nullptr when index is not null,
7851+
// but let's ignore such cases.
7852+
returnfalse;
7853+
}
7854+
7855+
if ((index !=nullptr) &&
7856+
(!index->OperIs(GT_LCL_VAR) || comp->lvaVarAddrExposed(index->AsLclVar()->GetLclNum())))
7857+
{
7858+
// Index should be either nullptr or a local.
7859+
returnfalse;
7860+
}
7861+
7862+
data->baseAddr = base ==nullptr ?nullptr : base;
7863+
data->index = index ==nullptr ?nullptr : index;
7864+
data->scale = ind->Addr()->AsAddrMode()->GetScale();
7865+
data->offset = ind->Addr()->AsAddrMode()->Offset();
7866+
}
7867+
elseif (ind->Addr()->OperIs(GT_LCL_VAR) && !comp->lvaVarAddrExposed(ind->Addr()->AsLclVar()->GetLclNum()))
7868+
{
7869+
// Address is just a local, no offset, scale is 1
7870+
data->baseAddr = ind->Addr();
7871+
data->index =nullptr;
7872+
data->scale =1;
7873+
data->offset =0;
7874+
}
7875+
else
7876+
{
7877+
// Address is not LEA or local.
7878+
returnfalse;
7879+
}
7880+
returntrue;
7881+
}
7882+
7883+
//------------------------------------------------------------------------
7884+
// LowerStoreIndirCoalescing: If the given STOREIND node is followed by a similar
7885+
// STOREIND node, try to merge them into a single store of a twice wider type. Example:
7886+
//
7887+
// * STOREIND int
7888+
// +--* LCL_VAR byref V00
7889+
// \--* CNS_INT int 0x1
7890+
//
7891+
// * STOREIND int
7892+
// +--* LEA(b+4) byref
7893+
// | \--* LCL_VAR byref V00
7894+
// \--* CNS_INT int 0x2
7895+
//
7896+
// We can merge these two into into a single store of 8 bytes with (0x1 | (0x2 << 32)) as the value
7897+
//
7898+
// * STOREIND long
7899+
// +--* LEA(b+0) byref
7900+
// | \--* LCL_VAR byref V00
7901+
// \--* CNS_INT long 0x200000001
7902+
//
7903+
// Arguments:
7904+
// ind - the current STOREIND node
7905+
//
7906+
voidLowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
7907+
{
7908+
// LA, RISC-V and ARM32 more likely to recieve a terrible performance hit from
7909+
// unaligned accesses making this optimization questionable.
7910+
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
7911+
if (!comp->opts.OptimizationEnabled())
7912+
{
7913+
return;
7914+
}
7915+
7916+
// For now, we require the current STOREIND to have LEA (previous store may not have it)
7917+
// So we can easily adjust the offset, consider making it more flexible in future.
7918+
if (!ind->Addr()->OperIs(GT_LEA))
7919+
{
7920+
return;
7921+
}
7922+
7923+
// We're going to do it in a loop while we see suitable STOREINDs to coalesce.
7924+
// E.g.: we have the following LIR sequence:
7925+
//
7926+
// ...addr nodes...
7927+
// STOREIND(int)
7928+
// ...addr nodes...
7929+
// STOREIND(short)
7930+
// ...addr nodes...
7931+
// STOREIND(short) <-- we're here
7932+
//
7933+
// First we merge two 'short' stores, then we merge the result with the 'int' store
7934+
// to get a single store of 8 bytes.
7935+
do
7936+
{
7937+
// This check is not really needed, just for better throughput.
7938+
if (!ind->TypeIs(TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT))
7939+
{
7940+
return;
7941+
}
7942+
7943+
StoreCoalescingData currData;
7944+
StoreCoalescingData prevData;
7945+
7946+
// Get coalescing data for the current STOREIND
7947+
if (!GetStoreCoalescingData(comp, ind, &currData))
7948+
{
7949+
return;
7950+
}
7951+
7952+
bool isClosedRange =false;
7953+
// Now we need to find the very first LIR node representing the current STOREIND
7954+
// and make sure that there are no other unexpected nodes in-between.
7955+
LIR::ReadOnlyRange currIndRange =BlockRange().GetTreeRange(ind, &isClosedRange);
7956+
if (!isClosedRange)
7957+
{
7958+
return;
7959+
}
7960+
GenTree* prevTree = currIndRange.FirstNode()->gtPrev;
7961+
// Now we need to find the previous STOREIND,
7962+
// we can ignore any NOPs or IL_OFFSETs in-between
7963+
while ((prevTree !=nullptr) && prevTree->OperIs(GT_NOP, GT_IL_OFFSET))
7964+
{
7965+
prevTree = prevTree->gtPrev;
7966+
}
7967+
7968+
// It's not a STOREIND - bail out.
7969+
if ((prevTree ==nullptr) || !prevTree->OperIs(GT_STOREIND))
7970+
{
7971+
return;
7972+
}
7973+
7974+
// Get coalescing data for the previous STOREIND
7975+
GenTreeStoreInd* prevInd = prevTree->AsStoreInd();
7976+
if (!GetStoreCoalescingData(comp, prevInd->AsStoreInd(), &prevData))
7977+
{
7978+
return;
7979+
}
7980+
7981+
// Same for the previous STOREIND, make sure there are no unexpected nodes around.
7982+
LIR::ReadOnlyRange prevIndRange =BlockRange().GetTreeRange(prevInd, &isClosedRange);
7983+
if (!isClosedRange)
7984+
{
7985+
return;
7986+
}
7987+
7988+
// STOREIND aren't value nodes.
7989+
LIR::Use use;
7990+
assert(!BlockRange().TryGetUse(prevInd, &use) && !BlockRange().TryGetUse(ind, &use));
7991+
7992+
// BaseAddr, Index, Scale and Type all have to match.
7993+
if ((prevData.scale != currData.scale) || (prevData.targetType != currData.targetType) ||
7994+
!GenTree::Compare(prevData.baseAddr, currData.baseAddr) ||
7995+
!GenTree::Compare(prevData.index, currData.index))
7996+
{
7997+
return;
7998+
}
7999+
8000+
// Offset has to match the size of the type. We don't support the same or overlapping offsets.
8001+
if (abs(prevData.offset - currData.offset) != (int)genTypeSize(prevData.targetType))
8002+
{
8003+
return;
8004+
}
8005+
8006+
// Since we're merging two stores of the same type, the new type is twice wider.
8007+
var_types oldType = ind->TypeGet();
8008+
var_types newType;
8009+
switch (oldType)
8010+
{
8011+
case TYP_BYTE:
8012+
case TYP_UBYTE:
8013+
newType = TYP_USHORT;
8014+
break;
8015+
8016+
case TYP_SHORT:
8017+
case TYP_USHORT:
8018+
newType = TYP_INT;// TYP_UINT is not legal in IR
8019+
break;
8020+
8021+
#ifdef TARGET_64BIT
8022+
case TYP_INT:
8023+
newType = TYP_LONG;
8024+
break;
8025+
#endif// TARGET_64BIT
8026+
8027+
// TYP_FLOAT and TYP_DOUBLE aren't needed here - they're expected to
8028+
// be converted to TYP_INT/TYP_LONG for constant value.
8029+
//
8030+
// TODO-CQ:
8031+
// 2 x LONG/REF -> SIMD16
8032+
// 2 x SIMD16 -> SIMD32
8033+
// 2 x SIMD32 -> SIMD64
8034+
//
8035+
// where it's legal (e.g. SIMD is not atomic on x64)
8036+
//
8037+
default:
8038+
return;
8039+
}
8040+
8041+
// Delete previous STOREIND entirely
8042+
BlockRange().Remove(std::move(prevIndRange));
8043+
8044+
// We know it's always LEA for now
8045+
GenTreeAddrMode* addr = ind->Addr()->AsAddrMode();
8046+
8047+
// Update offset to be the minimum of the two
8048+
addr->SetOffset(min(prevData.offset, currData.offset));
8049+
8050+
// Update type for both STOREIND and val
8051+
ind->gtType = newType;
8052+
ind->Data()->gtType = newType;
8053+
8054+
// We currently only support these constants for val
8055+
assert(prevData.value->IsCnsIntOrI() && currData.value->IsCnsIntOrI());
8056+
8057+
size_t lowerCns = (size_t)prevData.value->AsIntCon()->IconValue();
8058+
size_t upperCns = (size_t)currData.value->AsIntCon()->IconValue();
8059+
8060+
// if the previous store was at a higher address, swap the constants
8061+
if (prevData.offset > currData.offset)
8062+
{
8063+
std::swap(lowerCns, upperCns);
8064+
}
8065+
8066+
// Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT
8067+
// the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF.
8068+
size_t mask = ~(size_t(0)) >> (sizeof(size_t) -genTypeSize(oldType)) * BITS_IN_BYTE;
8069+
lowerCns &= mask;
8070+
upperCns &= mask;
8071+
8072+
size_t val = (lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE)));
8073+
JITDUMP("Coalesced two stores into a single store with value %lld\n", (int64_t)val);
8074+
8075+
// It's not expected to be contained yet, but just in case...
8076+
ind->Data()->ClearContained();
8077+
ind->Data()->AsIntCon()->gtIconVal = (ssize_t)val;
8078+
ind->gtFlags |= GTF_IND_UNALIGNED;
8079+
8080+
}while (true);
8081+
#endif// TARGET_XARCH || TARGET_ARM64
8082+
}
8083+
78058084
//------------------------------------------------------------------------
78068085
// LowerStoreIndirCommon: a common logic to lower StoreIndir.
78078086
//
@@ -7842,6 +8121,7 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
78428121
}
78438122
#endif
78448123

8124+
LowerStoreIndirCoalescing(ind);
78458125
LowerStoreIndir(ind);
78468126
}
78478127
}

‎src/coreclr/jit/lower.h‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ class Lowering final : public Phase
312312
voidLowerStoreIndirCommon(GenTreeStoreInd* ind);
313313
voidLowerIndir(GenTreeIndir* ind);
314314
voidLowerStoreIndir(GenTreeStoreInd* node);
315+
voidLowerStoreIndirCoalescing(GenTreeStoreInd* node);
315316
GenTree*LowerAdd(GenTreeOp* node);
316317
GenTree*LowerMul(GenTreeOp* mul);
317318
boolTryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp