- Notifications
You must be signed in to change notification settings - Fork5.2k
JIT: Merge stores#92852
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Merged
Uh oh!
There was an error while loading.Please reload this page.
Merged
JIT: Merge stores#92852
Changes from1 commit
Commits
Show all changes
18 commits Select commitHold shift + click to select a range
50d2929 STOREIND Coalescing
EgorBoeedb979 Remove short temporarily
EgorBobfe1765 Check that both indirs are unused
EgorBo6f2838e Enable short
EgorBo532266b Enable BYTE
EgorBo033a168 Update lower.cpp
EgorBo9b17c32 call multiple times
48a8f85 Clean up, disable for some platforms due to unaligned reads
EgorBoed568a6 Clean up
EgorBo4fbbeef Clean up
EgorBoc50200b Simplify
EgorBob9c3f6c Apply suggestions from code review
EgorBo7975250 Merge branch 'main' of github.com:dotnet/runtime into merge-stores
EgorBo3cc4585 Address feedback
EgorBof0dd22a Address feedback
EgorBo5c954fa Clean up
EgorBo62f9bed Address feedback
EgorBob2e4b1a Update src/coreclr/jit/lower.cpp
EgorBoFile filter
Filter by extension
Conversations
Failed to load comments.
Loading
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Jump to file
Failed to load files.
Loading
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
NextNext commit
STOREIND Coalescing
- Loading branch information
Uh oh!
There was an error while loading.Please reload this page.
commit50d29291bb8d008201ab53558753e154654db55c
There are no files selected for viewing
301 changes: 301 additions & 0 deletionssrc/coreclr/jit/lower.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -7802,6 +7802,306 @@ void Lowering::ContainCheckBitCast(GenTree* node) | ||
| } | ||
| } | ||
| struct StoreCoalescingData | ||
| { | ||
| var_types type; | ||
| GenTreeLclVarCommon* base; | ||
| GenTreeLclVarCommon* index; | ||
| uint32_t scale; | ||
| int offset; | ||
| GenTree* val; | ||
| }; | ||
| //------------------------------------------------------------------------ | ||
| // GetStoreCoalescingData: given a STOREIND node, get the data needed to perform | ||
| // store coalescing including pointer to the previous node. | ||
| // | ||
| // Arguments: | ||
| // ind - the STOREIND node | ||
| // data - [OUT] the data needed for store coalescing | ||
| // prevTree - [OUT] the pointer to the previous node | ||
| // | ||
| // Return Value: | ||
| // true if the data was successfully retrieved, false otherwise. | ||
| // Basically, false means that we definitely can't do store coalescing. | ||
| // | ||
| static bool GetStoreCoalescingData(GenTreeStoreInd* ind, StoreCoalescingData* data, GenTree** prevTree) | ||
| { | ||
| // Don't merge volatile stores. | ||
| if (ind->IsVolatile()) | ||
| { | ||
| return false; | ||
| } | ||
| // Data has to be constant. We might allow locals in the future. | ||
| if (!ind->Data()->IsCnsIntOrI()) | ||
| { | ||
| return false; | ||
| } | ||
| // We will need to walk previous trees (up to 4 nodes in a store) | ||
| // We already know that we have value and addr nodes, LEA might add more. | ||
| const int MaxTrees = 4; | ||
| GenTree* trees[MaxTrees] = {ind->Data(), ind->Addr(), nullptr, nullptr}; | ||
| int treesCount = 2; | ||
| data->type = ind->TypeGet(); | ||
| data->val = ind->Data(); | ||
| if (ind->Addr()->OperIs(GT_LEA)) | ||
| { | ||
| GenTree* base = ind->Addr()->AsAddrMode()->Base(); | ||
| GenTree* index = ind->Addr()->AsAddrMode()->Index(); | ||
| if ((base == nullptr) || !base->IsLocal()) | ||
| { | ||
| // Base must be a local. It's possible for it to be nullptr when index is not null, | ||
| // but let's ignore such cases. | ||
| return false; | ||
| } | ||
| if ((index != nullptr) && !index->IsLocal()) | ||
| { | ||
| // Index is either nullptr or a local. | ||
| return false; | ||
| } | ||
| data->base = base == nullptr ? nullptr : base->AsLclVarCommon(); | ||
| data->index = index == nullptr ? nullptr : index->AsLclVarCommon(); | ||
| data->scale = ind->Addr()->AsAddrMode()->GetScale(); | ||
| data->offset = ind->Addr()->AsAddrMode()->Offset(); | ||
| // Add index and base to the list of trees. | ||
| trees[treesCount++] = base; | ||
| if (index != nullptr) | ||
| { | ||
| trees[treesCount++] = index; | ||
| } | ||
| } | ||
| else if (ind->Addr()->OperIsLocal()) | ||
| { | ||
| // Address is just a local, no offset, scale is 1 | ||
| data->base = ind->Addr()->AsLclVarCommon(); | ||
| data->index = nullptr; | ||
| data->scale = 1; | ||
| data->offset = 0; | ||
| } | ||
| else | ||
| { | ||
| // Address is not LEA or local. | ||
| return false; | ||
| } | ||
| // We expect all the trees to be found in gtPrev (we don't care about the order, just make sure we don't have | ||
| // unexpected trees in-between) | ||
| GenTree* prev = ind->gtPrev; | ||
| for (int i = 0; i < treesCount; i++) | ||
| { | ||
| bool found = false; | ||
| // We don't need any O(1) lookups here, the array is too small | ||
| for (int j = 0; j < MaxTrees; j++) | ||
| { | ||
| if (trees[j] == prev) | ||
| { | ||
| trees[j] = nullptr; | ||
| found = true; | ||
| break; | ||
| } | ||
| } | ||
| if (!found) | ||
| { | ||
| // Unexpected node in-between. | ||
| return false; | ||
| } | ||
| prev = prev->gtPrev; | ||
| } | ||
| *prevTree = prev; | ||
EgorBo marked this conversation as resolved. OutdatedShow resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
| return true; | ||
| } | ||
| //------------------------------------------------------------------------ | ||
| // CanBeCoalesced: given two store coalescing data, | ||
| // determine whether they can be coalesced. | ||
| // | ||
| // Arguments: | ||
| // data1 - the first store coalescing data | ||
| // data2 - the second store coalescing data (order is not important) | ||
| // | ||
| // Return Value: | ||
| // true if the data can be coalesced, false otherwise. | ||
| // | ||
| static bool CanBeCoalesced(StoreCoalescingData* data1, StoreCoalescingData* data2) | ||
| { | ||
| // base, index, scale and type all have to be the same | ||
| if ((data1->scale != data2->scale) || (data1->type != data2->type) || !GenTree::Compare(data1->base, data2->base) || | ||
| !GenTree::Compare(data1->index, data2->index)) | ||
| { | ||
| return false; | ||
| } | ||
| // val has to be a constant | ||
| if (!data1->val->OperIsConst() && !data2->val->OperIsConst()) | ||
| { | ||
| return false; | ||
| } | ||
| // Offset has to match the size of the type | ||
| // We don't support the same or overlapping offsets. | ||
| if (abs(data1->offset - data2->offset) != (int)genTypeSize(data1->type)) | ||
| { | ||
| return false; | ||
| } | ||
| return true; | ||
| } | ||
| //------------------------------------------------------------------------ | ||
| // LowerCoalescingWithPreviousInd: Try to coalesce a STOREIND with previous STOREINDs. Example: | ||
| // | ||
| // * STOREIND int | ||
| // +--* LCL_VAR byref V00 | ||
| // \--* CNS_INT int 0x1 | ||
| // | ||
| // * STOREIND int | ||
| // +--* LEA(b+4) byref | ||
| // | \--* LCL_VAR byref V00 | ||
| // \--* CNS_INT int 0x2 | ||
| // | ||
| // We can merge these two into into a single store of 8 bytes with (0x1 | (0x2 << 32)) as the value | ||
| // | ||
| // * STOREIND long | ||
| // +--* LEA(b+0) byref | ||
| // | \--* LCL_VAR byref V00 | ||
| // \--* CNS_INT long 0x200000001 | ||
| // | ||
| // Arguments: | ||
| // ind - the current STOREIND node | ||
| // | ||
| void Lowering::LowerCoalescingWithPreviousInd(GenTreeStoreInd* ind) | ||
| { | ||
| if (!comp->opts.OptimizationEnabled()) | ||
| { | ||
| return; | ||
| } | ||
| // For now, we require the current STOREIND to have LEA (previous store may not have it) | ||
| // TODO-CQ: Remove this restriction, we might even not need any LEAs at all for the final coalesced store. | ||
| if (!ind->Addr()->OperIs(GT_LEA)) | ||
| { | ||
| return; | ||
| } | ||
| // This check is not really needed, just for better throughput. | ||
| // We only support these types for the initial version. | ||
| if (ind->TypeIs(TYP_SHORT, TYP_USHORT, TYP_INT)) | ||
| { | ||
| return; | ||
| } | ||
| StoreCoalescingData currData; | ||
| StoreCoalescingData prevData; | ||
| GenTreeStoreInd* prevInd; | ||
| GenTree* prevTree; | ||
| // Get coalescing data for the current STOREIND | ||
| if (GetStoreCoalescingData(ind, &currData, &prevTree)) | ||
| { | ||
| while (prevTree != nullptr && prevTree->OperIs(GT_NOP, GT_IL_OFFSET)) | ||
| { | ||
| prevTree = prevTree->gtPrev; | ||
| } | ||
| if ((prevTree == nullptr) || !prevTree->OperIs(GT_STOREIND)) | ||
| { | ||
| return; | ||
| } | ||
| prevInd = prevTree->AsStoreInd(); | ||
| // Get coalescing data for the previous STOREIND | ||
| if (GetStoreCoalescingData(prevInd->AsStoreInd(), &prevData, &prevTree) && (prevTree != nullptr)) | ||
| { | ||
| // Check whether we can coalesce the two stores | ||
| if (CanBeCoalesced(&prevData, &currData)) | ||
| { | ||
| var_types oldType = ind->TypeGet(); | ||
| var_types newType; | ||
| switch (oldType) | ||
| { | ||
| case TYP_SHORT: | ||
| case TYP_USHORT: | ||
| newType = TYP_INT; | ||
| break; | ||
| #ifdef TARGET_64BIT | ||
| case TYP_INT: | ||
| newType = TYP_LONG; | ||
| break; | ||
| #endif | ||
| // TODO: BYTE, SIMD | ||
| default: | ||
| return; | ||
| } | ||
| // Delete previous STOREIND | ||
| BlockRange().Remove(prevTree->gtNext, prevInd); | ||
| // We know it's always LEA for now | ||
| GenTreeAddrMode* addr = ind->Addr()->AsAddrMode(); | ||
| // Take the smallest offset | ||
| addr->SetOffset(min(prevData.offset, currData.offset)); | ||
| // Make type twice bigger | ||
| ind->gtType = newType; | ||
| // We currently only support constants for val | ||
| assert(prevData.val->IsCnsIntOrI() && currData.val->IsCnsIntOrI()); | ||
| size_t lowerCns = (size_t)prevData.val->AsIntCon()->IconValue(); | ||
| size_t upperCns = (size_t)currData.val->AsIntCon()->IconValue(); | ||
| ind->Data()->gtType = newType; | ||
| // TP: if both are zero - we're done. | ||
| if ((lowerCns | upperCns) == 0) | ||
| { | ||
| JITDUMP("Coalesced two stores into a single store with value 0\n"); | ||
| } | ||
| else | ||
| { | ||
| // if the previous store was at a higher address, swap the constants | ||
| if (prevData.offset > currData.offset) | ||
| { | ||
| std::swap(lowerCns, upperCns); | ||
| } | ||
| // Trim the constants to the size of the type, e.g. for TYP_SHORT and TYP_USHORT | ||
| // the mask will be 0xFFFF, for TYP_INT - 0xFFFFFFFF. | ||
| size_t mask = ~0 >> (sizeof(size_t) - genTypeSize(oldType)) * BITS_IN_BYTE; | ||
| lowerCns &= mask; | ||
| upperCns &= mask; | ||
| ssize_t val = (ssize_t)(lowerCns | (upperCns << (genTypeSize(oldType) * BITS_IN_BYTE))); | ||
| JITDUMP("Coalesced two stores into a single store with value %lld\n", (int64_t)val); | ||
| // It's not expected to be contained yet, but just in case... | ||
| ind->Data()->ClearContained(); | ||
| ind->Data()->AsIntCon()->gtIconVal = val; | ||
| } | ||
| // Now check whether we can coalesce the new store with the previous one, e.g. we had: | ||
| // | ||
| // STOREIND(int) | ||
| // STOREIND(short) | ||
| // STOREIND(short) | ||
| // | ||
| // so we can coalesce the whole thing into a single store of 8 bytes. | ||
| LowerCoalescingWithPreviousInd(ind); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| //------------------------------------------------------------------------ | ||
| // LowerStoreIndirCommon: a common logic to lower StoreIndir. | ||
| // | ||
| @@ -7842,6 +8142,7 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) | ||
| } | ||
| #endif | ||
| LowerCoalescingWithPreviousInd(ind); | ||
| LowerStoreIndir(ind); | ||
| } | ||
| } | ||
1 change: 1 addition & 0 deletionssrc/coreclr/jit/lower.h
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.