Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Use SIMD for block inits with GC fields#102132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
EgorBo merged 5 commits intodotnet:mainfromEgorBo:simd-blk-gc-init
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
NextNext commit
Use SIMD for block inits with GC fields
  • Loading branch information
@EgorBo
EgorBo committedMay 12, 2024
commitf2c908959d8fc7b99019001de80d7ff6d38ec590
120 changes: 79 additions & 41 deletionssrc/coreclr/jit/codegenxarch.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -3231,6 +3231,18 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
assert(size <= INT32_MAX);
assert(dstOffset < (INT32_MAX - static_cast<int>(size)));

auto emitStore = [&](instruction ins, unsigned width, regNumber target) {
if (dstLclNum != BAD_VAR_NUM)
{
emit->emitIns_S_R(ins, EA_ATTR(width), target, dstLclNum, dstOffset);
}
else
{
emit->emitIns_ARX_R(ins, EA_ATTR(width), target, dstAddrBaseReg, dstAddrIndexReg, dstAddrIndexScale,
dstOffset);
}
};

#ifdef FEATURE_SIMD
if (willUseSimdMov)
{
Expand All@@ -3244,18 +3256,6 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
instruction simdMov = simdUnalignedMovIns();
unsigned bytesWritten = 0;

auto emitSimdMovs = [&]() {
if (dstLclNum != BAD_VAR_NUM)
{
emit->emitIns_S_R(simdMov, EA_ATTR(regSize), srcXmmReg, dstLclNum, dstOffset);
}
else
{
emit->emitIns_ARX_R(simdMov, EA_ATTR(regSize), srcXmmReg, dstAddrBaseReg, dstAddrIndexReg,
dstAddrIndexScale, dstOffset);
}
};

while (bytesWritten < size)
{
if (bytesWritten + regSize > size)
Expand All@@ -3264,7 +3264,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
break;
}

emitSimdMovs();
emitStore(simdMov, regSize, srcXmmReg);
dstOffset += regSize;
bytesWritten += regSize;
}
Expand All@@ -3279,10 +3279,71 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)

// Rewind dstOffset so we can fit a vector for the while remainder
dstOffset -= (regSize - size);
emitSimdMovs();
emitStore(simdMov, regSize, srcXmmReg);
size = 0;
}
}
else if (node->IsOnHeapAndContainsReferences() && ((internalRegisters.GetAll(node) & RBM_ALLFLOAT) != 0))
{
// For block with GC refs we still can use SIMD, but only for continuous
// non-GC parts where atomicity guarantees are not that strict.
assert(!willUseSimdMov);
ClassLayout* layout = node->GetLayout();
regNumber simdZeroReg = REG_NA;
unsigned slots = layout->GetSlotCount();
unsigned slot = 0;
while (slot < slots)
{
if (!layout->IsGCPtr(slot))
{
// How many continuous non-GC slots do we have?
unsigned nonGcSlotCount = 0;
do
{
nonGcSlotCount++;
slot++;
} while ((slot < slots) && !layout->IsGCPtr(slot));

for (unsigned nonGcSlot = 0; nonGcSlot < nonGcSlotCount; nonGcSlot++)
{
// Are continuous nongc slots enough to use SIMD?
unsigned simdSize = compiler->roundDownSIMDSize((nonGcSlotCount - nonGcSlot) * REGSIZE_BYTES);
if (simdSize > 0)
{
// Initialize simdZeroReg with zero on demand, it's enough to use TYP_SIMD16
// regardless of the SIMD size we're going to use.
if (simdZeroReg == REG_NA)
{
simdZeroReg = internalRegisters.GetSingle(node, RBM_ALLFLOAT);
simd_t vecCon;
memset(&vecCon, (uint8_t)src->AsIntCon()->IconValue(), sizeof(simd_t));
genSetRegToConst(simdZeroReg, TYP_SIMD16, &vecCon);
}

emitStore(simdUnalignedMovIns(), simdSize, simdZeroReg);
dstOffset += (int)simdSize;
nonGcSlot += (simdSize / REGSIZE_BYTES) - 1;
}
else
{
emitStore(INS_mov, REGSIZE_BYTES, srcIntReg);
dstOffset += REGSIZE_BYTES;
}
}
}
else
{
// GC slot - update atomically
emitStore(INS_mov, REGSIZE_BYTES, srcIntReg);
dstOffset += REGSIZE_BYTES;
slot++;
}
}

// There are no trailing elements
assert((layout->GetSize() % TARGET_POINTER_SIZE) == 0);
size = 0;
}
#endif // FEATURE_SIMD

assert((srcIntReg != REG_NA) || (size == 0));
Expand All@@ -3298,15 +3359,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)

for (; size > regSize; size -= regSize, dstOffset += regSize)
{
if (dstLclNum != BAD_VAR_NUM)
{
emit->emitIns_S_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstLclNum, dstOffset);
}
else
{
emit->emitIns_ARX_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstAddrIndexReg,
dstAddrIndexScale, dstOffset);
}
emitStore(INS_mov, regSize, srcIntReg);
}

// Handle the non-SIMD remainder by overlapping with previously processed data if needed
Expand All@@ -3322,15 +3375,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
assert(shiftBack <= regSize);
dstOffset -= shiftBack;

if (dstLclNum != BAD_VAR_NUM)
{
emit->emitIns_S_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstLclNum, dstOffset);
}
else
{
emit->emitIns_ARX_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstAddrIndexReg,
dstAddrIndexScale, dstOffset);
}
emitStore(INS_mov, regSize, srcIntReg);
}
#else // TARGET_X86
for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, dstOffset += regSize)
Expand All@@ -3339,15 +3384,8 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
{
regSize /= 2;
}
if (dstLclNum != BAD_VAR_NUM)
{
emit->emitIns_S_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstLclNum, dstOffset);
}
else
{
emit->emitIns_ARX_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstAddrIndexReg,
dstAddrIndexScale, dstOffset);
}

emitStore(INS_mov, regSize, srcIntReg);
}
#endif
}
Expand Down
34 changes: 31 additions & 3 deletionssrc/coreclr/jit/lsraxarch.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1430,9 +1430,37 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
{
case GenTreeBlk::BlkOpKindUnroll:
{
const bool canUse16BytesSimdMov =
!blkNode->IsOnHeapAndContainsReferences() && compiler->IsBaselineSimdIsaSupported();
const bool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES);
const bool canUse16BytesSimdMov = compiler->IsBaselineSimdIsaSupported();
bool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES);
if (willUseSimdMov && blkNode->IsOnHeapAndContainsReferences())
{
ClassLayout* layout = blkNode->GetLayout();
unsigned slots = layout->GetSlotCount();
unsigned slot = 0;
unsigned simds = 0;
while (slot < slots)
{
if (!layout->IsGCPtr(slot))
{
// How many continuous non-GC slots do we have?
unsigned nonGcSlotCount = 0;
do
{
nonGcSlotCount++;
slot++;
} while ((slot < slots) && !layout->IsGCPtr(slot));
simds += (nonGcSlotCount * TARGET_POINTER_SIZE) / XMM_REGSIZE_BYTES;
}
else
{
// GC slot
slot++;
}
}

// Just one XMM candidate is not profitable
willUseSimdMov = simds > 1;
}

if (willUseSimdMov)
{
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp