Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

JIT: Merge stores#92852

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
EgorBo merged 18 commits intodotnet:mainfromEgorBo:merge-stores
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
18 commits
Select commitHold shift + click to select a range
50d2929
STOREIND Coalescing
EgorBoSep 30, 2023
eedb979
Remove short temporarily
EgorBoSep 30, 2023
bfe1765
Check that both indirs are unused
EgorBoSep 30, 2023
6f2838e
Enable short
EgorBoSep 30, 2023
532266b
Enable BYTE
EgorBoSep 30, 2023
033a168
Update lower.cpp
EgorBoOct 1, 2023
9b17c32
call multiple times
Oct 1, 2023
48a8f85
Clean up, disable for some platforms due to unaligned reads
EgorBoOct 1, 2023
ed568a6
Clean up
EgorBoOct 1, 2023
4fbbeef
Clean up
EgorBoOct 1, 2023
c50200b
Simplify
EgorBoOct 1, 2023
b9c3f6c
Apply suggestions from code review
EgorBoOct 2, 2023
7975250
Merge branch 'main' of github.com:dotnet/runtime into merge-stores
EgorBoOct 2, 2023
3cc4585
Address feedback
EgorBoOct 2, 2023
f0dd22a
Address feedback
EgorBoOct 2, 2023
5c954fa
Clean up
EgorBoOct 2, 2023
62f9bed
Address feedback
EgorBoOct 2, 2023
b2e4b1a
Update src/coreclr/jit/lower.cpp
EgorBoOct 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
280 changes: 280 additions & 0 deletionssrc/coreclr/jit/lower.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -7802,6 +7802,285 @@ void Lowering::ContainCheckBitCast(GenTree* node)
}
}

struct StoreCoalescingData
{
var_types targetType;
GenTree* baseAddr;
GenTree* index;
GenTree* value;
uint32_t scale;
int offset;
};

//------------------------------------------------------------------------
// GetStoreCoalescingData: given a STOREIND node, get the data needed to perform
// store coalescing including pointer to the previous node.
//
// Arguments:
// comp - the compiler instance
// ind - the STOREIND node
// data - [OUT] the data needed for store coalescing
//
// Return Value:
// true if the data was successfully retrieved, false otherwise.
// Basically, false means that we definitely can't do store coalescing.
//
static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCoalescingData* data)
{
// Don't merge volatile stores.
if (ind->IsVolatile())
{
return false;
}

// Data has to be INT_CNS, can be also VEC_CNS in future.
if (!ind->Data()->IsCnsIntOrI())
{
return false;
}

data->targetType = ind->TypeGet();
data->value = ind->Data();
if (ind->Addr()->OperIs(GT_LEA))
{
GenTree* base = ind->Addr()->AsAddrMode()->Base();
GenTree* index = ind->Addr()->AsAddrMode()->Index();
if ((base == nullptr) || !base->OperIs(GT_LCL_VAR) || comp->lvaVarAddrExposed(base->AsLclVar()->GetLclNum()))
{
// Base must be a local. It's possible for it to be nullptr when index is not null,
// but let's ignore such cases.
return false;
}

if ((index != nullptr) &&
(!index->OperIs(GT_LCL_VAR) || comp->lvaVarAddrExposed(index->AsLclVar()->GetLclNum())))
{
// Index should be either nullptr or a local.
return false;
}

data->baseAddr = base == nullptr ? nullptr : base;
data->index = index == nullptr ? nullptr : index;
data->scale = ind->Addr()->AsAddrMode()->GetScale();
data->offset = ind->Addr()->AsAddrMode()->Offset();
}
else if (ind->Addr()->OperIs(GT_LCL_VAR) && !comp->lvaVarAddrExposed(ind->Addr()->AsLclVar()->GetLclNum()))
{
// Address is just a local, no offset, scale is 1
data->baseAddr = ind->Addr();
data->index = nullptr;
data->scale = 1;
data->offset = 0;
}
else
{
// Address is not LEA or local.
return false;
}
return true;
}

//------------------------------------------------------------------------
// LowerStoreIndirCoalescing: If the given STOREIND node is followed by a similar
// STOREIND node, try to merge them into a single store of a twice wider type. Example:
//
// * STOREIND int
// +--* LCL_VAR byref V00
// \--* CNS_INT int 0x1
//
// * STOREIND int
// +--* LEA(b+4) byref
// | \--* LCL_VAR byref V00
// \--* CNS_INT int 0x2
//
// We can merge these two into into a single store of 8 bytes with (0x1 | (0x2 << 32)) as the value
//
// * STOREIND long
// +--* LEA(b+0) byref
// | \--* LCL_VAR byref V00
// \--* CNS_INT long 0x200000001
//
// Arguments:
// ind - the current STOREIND node
//
void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
{
// LA, RISC-V and ARM32 more likely to recieve a terrible performance hit from
// unaligned accesses making this optimization questionable.
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
if (!comp->opts.OptimizationEnabled())
{
return;
}

// For now, we require the current STOREIND to have LEA (previous store may not have it)
// So we can easily adjust the offset, consider making it more flexible in future.
if (!ind->Addr()->OperIs(GT_LEA))
{
return;
}

// We're going to do it in a loop while we see suitable STOREINDs to coalesce.
// E.g.: we have the following LIR sequence:
//
// ...addr nodes...
// STOREIND(int)
// ...addr nodes...
// STOREIND(short)
// ...addr nodes...
// STOREIND(short) <-- we're here
//
// First we merge two 'short' stores, then we merge the result with the 'int' store
// to get a single store of 8 bytes.
do
{
// This check is not really needed, just for better throughput.
if (!ind->TypeIs(TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT))
{
return;
}

StoreCoalescingData currData;
StoreCoalescingData prevData;

// Get coalescing data for the current STOREIND
if (!GetStoreCoalescingData(comp, ind, &currData))
{
return;
}

bool isClosedRange = false;
// Now we need to find the very first LIR node representing the current STOREIND
// and make sure that there are no other unexpected nodes in-between.
LIR::ReadOnlyRange currIndRange = BlockRange().GetTreeRange(ind, &isClosedRange);
if (!isClosedRange)
{
return;
}
GenTree* prevTree = currIndRange.FirstNode()->gtPrev;
// Now we need to find the previous STOREIND,
// we can ignore any NOPs or IL_OFFSETs in-between
while ((prevTree != nullptr) && prevTree->OperIs(GT_NOP, GT_IL_OFFSET))
{
prevTree = prevTree->gtPrev;
}

// It's not a STOREIND - bail out.
if ((prevTree == nullptr) || !prevTree->OperIs(GT_STOREIND))
{
return;
}

// Get coalescing data for the previous STOREIND
GenTreeStoreInd* prevInd = prevTree->AsStoreInd();
if (!GetStoreCoalescingData(comp, prevInd->AsStoreInd(), &prevData))
{
return;
}

// Same for the previous STOREIND, make sure there are no unexpected nodes around.
LIR::ReadOnlyRange prevIndRange = BlockRange().GetTreeRange(prevInd, &isClosedRange);
if (!isClosedRange)
{
return;
}

// STOREIND aren't value nodes.
LIR::Use use;
assert(!BlockRange().TryGetUse(prevInd, &use) && !BlockRange().TryGetUse(ind, &use));

// BaseAddr, Index, Scale and Type all have to match.
if ((prevData.scale != currData.scale) || (prevData.targetType != currData.targetType) ||
!GenTree::Compare(prevData.baseAddr, currData.baseAddr) ||
!GenTree::Compare(prevData.index, currData.index))
{
return;
}

// Offset has to match the size of the type. We don't support the same or overlapping offsets.
if (abs(prevData.offset - currData.offset) != (int)genTypeSize(prevData.targetType))
{
return;
}

// Since we're merging two stores of the same type, the new type is twice wider.
var_types oldType = ind->TypeGet();
var_types newType;
switch (oldType)
{
case TYP_BYTE:
case TYP_UBYTE:
newType = TYP_USHORT;
break;

case TYP_SHORT:
case TYP_USHORT:
newType = TYP_INT; // TYP_UINT is not legal in IR
break;

#ifdef TARGET_64BIT
case TYP_INT:
newType = TYP_LONG;
break;
#endif // TARGET_64BIT

// TYP_FLOAT and TYP_DOUBLE aren't needed here - they're expected to
// be converted to TYP_INT/TYP_LONG for constant value.
//
// TODO-CQ:
// 2 x LONG/REF -> SIMD16
// 2 x SIMD16 -> SIMD32
// 2 x SIMD32 -> SIMD64
//
// where it's legal (e.g. SIMD is not atomic on x64)
//
default:
return;
}

// Delete previous STOREIND entirely
BlockRange().Remove(std::move(prevIndRange));

// We know it's always LEA for now
GenTreeAddrMode* addr = ind->Addr()->AsAddrMode();

// Update offset to be the minimum of the two
addr->SetOffset(min(prevData.offset, currData.offset));

// Update type for both STOREIND and val
ind->gtType = newType;
ind->Data()->gtType = newType;

// We currently only support these constants for val
assert(prevData.value->IsCnsIntOrI() && currData.value->IsCnsIntOrI());

size_t lowerCns = (size_t)prevData.value->AsIntCon()->IconValue();
size_t upperCns = (size_t)currData.value->AsIntCon()->IconValue();

// if the previous store was at a higher address, swap the constants
if (prevData.offset > currData.offset)
{
std::swap(lowerCns, upperCns);
}

// Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT
// the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF.
size_t mask = ~(size_t(0)) >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE;
lowerCns &= mask;
upperCns &= mask;

size_t val = (lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE)));
JITDUMP("Coalesced two stores into a single store with value %lld\n", (int64_t)val);

// It's not expected to be contained yet, but just in case...
ind->Data()->ClearContained();
ind->Data()->AsIntCon()->gtIconVal = (ssize_t)val;
ind->gtFlags |= GTF_IND_UNALIGNED;

} while (true);
#endif // TARGET_XARCH || TARGET_ARM64
}

//------------------------------------------------------------------------
// LowerStoreIndirCommon: a common logic to lower StoreIndir.
//
Expand DownExpand Up@@ -7842,6 +8121,7 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
}
#endif

LowerStoreIndirCoalescing(ind);
LowerStoreIndir(ind);
}
}
Expand Down
1 change: 1 addition & 0 deletionssrc/coreclr/jit/lower.h
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -312,6 +312,7 @@ class Lowering final : public Phase
voidLowerStoreIndirCommon(GenTreeStoreInd* ind);
voidLowerIndir(GenTreeIndir* ind);
voidLowerStoreIndir(GenTreeStoreInd* node);
voidLowerStoreIndirCoalescing(GenTreeStoreInd* node);
GenTree*LowerAdd(GenTreeOp* node);
GenTree*LowerMul(GenTreeOp* mul);
boolTryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode);
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp