Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2b0c1de

Browse files
authored
Use SIMD for block inits with GC fields (#102132)
1 parent1f08a36 commit2b0c1de

File tree

2 files changed

+103
-44
lines changed

2 files changed

+103
-44
lines changed

‎src/coreclr/jit/codegenxarch.cpp‎

Lines changed: 79 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3231,6 +3231,18 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
32313231
assert(size <= INT32_MAX);
32323232
assert(dstOffset < (INT32_MAX -static_cast<int>(size)));
32333233

3234+
auto emitStore = [&](instruction ins,unsigned width, regNumber target) {
3235+
if (dstLclNum != BAD_VAR_NUM)
3236+
{
3237+
emit->emitIns_S_R(ins,EA_ATTR(width), target, dstLclNum, dstOffset);
3238+
}
3239+
else
3240+
{
3241+
emit->emitIns_ARX_R(ins,EA_ATTR(width), target, dstAddrBaseReg, dstAddrIndexReg, dstAddrIndexScale,
3242+
dstOffset);
3243+
}
3244+
};
3245+
32343246
#ifdef FEATURE_SIMD
32353247
if (willUseSimdMov)
32363248
{
@@ -3244,18 +3256,6 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
32443256
instruction simdMov =simdUnalignedMovIns();
32453257
unsigned bytesWritten =0;
32463258

3247-
auto emitSimdMovs = [&]() {
3248-
if (dstLclNum != BAD_VAR_NUM)
3249-
{
3250-
emit->emitIns_S_R(simdMov,EA_ATTR(regSize), srcXmmReg, dstLclNum, dstOffset);
3251-
}
3252-
else
3253-
{
3254-
emit->emitIns_ARX_R(simdMov,EA_ATTR(regSize), srcXmmReg, dstAddrBaseReg, dstAddrIndexReg,
3255-
dstAddrIndexScale, dstOffset);
3256-
}
3257-
};
3258-
32593259
while (bytesWritten < size)
32603260
{
32613261
if (bytesWritten + regSize > size)
@@ -3264,7 +3264,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
32643264
break;
32653265
}
32663266

3267-
emitSimdMovs();
3267+
emitStore(simdMov, regSize, srcXmmReg);
32683268
dstOffset += regSize;
32693269
bytesWritten += regSize;
32703270
}
@@ -3279,10 +3279,71 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
32793279

32803280
// Rewind dstOffset so we can fit a vector for the while remainder
32813281
dstOffset -= (regSize - size);
3282-
emitSimdMovs();
3282+
emitStore(simdMov, regSize, srcXmmReg);
32833283
size =0;
32843284
}
32853285
}
3286+
elseif (node->IsOnHeapAndContainsReferences() && ((internalRegisters.GetAll(node) & RBM_ALLFLOAT) !=0))
3287+
{
3288+
// For block with GC refs we still can use SIMD, but only for continuous
3289+
// non-GC parts where atomicity guarantees are not that strict.
3290+
assert(!willUseSimdMov);
3291+
ClassLayout* layout = node->GetLayout();
3292+
3293+
regNumber simdZeroReg = REG_NA;
3294+
unsigned slots = layout->GetSlotCount();
3295+
unsigned slot =0;
3296+
while (slot < slots)
3297+
{
3298+
if (!layout->IsGCPtr(slot))
3299+
{
3300+
// How many continuous non-GC slots do we have?
3301+
unsigned nonGcSlotCount =0;
3302+
do
3303+
{
3304+
nonGcSlotCount++;
3305+
slot++;
3306+
}while ((slot < slots) && !layout->IsGCPtr(slot));
3307+
3308+
for (unsigned nonGcSlot =0; nonGcSlot < nonGcSlotCount; nonGcSlot++)
3309+
{
3310+
// Are continuous nongc slots enough to use SIMD?
3311+
unsigned simdSize = compiler->roundDownSIMDSize((nonGcSlotCount - nonGcSlot) * REGSIZE_BYTES);
3312+
if (simdSize >0)
3313+
{
3314+
// Initialize simdZeroReg with zero on demand
3315+
if (simdZeroReg == REG_NA)
3316+
{
3317+
simdZeroReg = internalRegisters.GetSingle(node, RBM_ALLFLOAT);
3318+
// SIMD16 is sufficient for any SIMD size
3319+
simd_t vecCon = {};
3320+
genSetRegToConst(simdZeroReg, TYP_SIMD16, &vecCon);
3321+
}
3322+
3323+
emitStore(simdUnalignedMovIns(), simdSize, simdZeroReg);
3324+
dstOffset += (int)simdSize;
3325+
nonGcSlot += (simdSize / REGSIZE_BYTES) -1;
3326+
}
3327+
else
3328+
{
3329+
emitStore(INS_mov, REGSIZE_BYTES, srcIntReg);
3330+
dstOffset += REGSIZE_BYTES;
3331+
}
3332+
}
3333+
}
3334+
else
3335+
{
3336+
// GC slot - update atomically
3337+
emitStore(INS_mov, REGSIZE_BYTES, srcIntReg);
3338+
dstOffset += REGSIZE_BYTES;
3339+
slot++;
3340+
}
3341+
}
3342+
3343+
// There are no trailing elements
3344+
assert((layout->GetSize() % TARGET_POINTER_SIZE) ==0);
3345+
size =0;
3346+
}
32863347
#endif// FEATURE_SIMD
32873348

32883349
assert((srcIntReg != REG_NA) || (size ==0));
@@ -3298,15 +3359,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
32983359

32993360
for (; size > regSize; size -= regSize, dstOffset += regSize)
33003361
{
3301-
if (dstLclNum != BAD_VAR_NUM)
3302-
{
3303-
emit->emitIns_S_R(INS_mov,EA_ATTR(regSize), srcIntReg, dstLclNum, dstOffset);
3304-
}
3305-
else
3306-
{
3307-
emit->emitIns_ARX_R(INS_mov,EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstAddrIndexReg,
3308-
dstAddrIndexScale, dstOffset);
3309-
}
3362+
emitStore(INS_mov, regSize, srcIntReg);
33103363
}
33113364

33123365
// Handle the non-SIMD remainder by overlapping with previously processed data if needed
@@ -3322,15 +3375,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
33223375
assert(shiftBack <= regSize);
33233376
dstOffset -= shiftBack;
33243377

3325-
if (dstLclNum != BAD_VAR_NUM)
3326-
{
3327-
emit->emitIns_S_R(INS_mov,EA_ATTR(regSize), srcIntReg, dstLclNum, dstOffset);
3328-
}
3329-
else
3330-
{
3331-
emit->emitIns_ARX_R(INS_mov,EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstAddrIndexReg,
3332-
dstAddrIndexScale, dstOffset);
3333-
}
3378+
emitStore(INS_mov, regSize, srcIntReg);
33343379
}
33353380
#else// TARGET_X86
33363381
for (unsigned regSize = REGSIZE_BYTES; size >0; size -= regSize, dstOffset += regSize)
@@ -3339,15 +3384,8 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
33393384
{
33403385
regSize /=2;
33413386
}
3342-
if (dstLclNum != BAD_VAR_NUM)
3343-
{
3344-
emit->emitIns_S_R(INS_mov,EA_ATTR(regSize), srcIntReg, dstLclNum, dstOffset);
3345-
}
3346-
else
3347-
{
3348-
emit->emitIns_ARX_R(INS_mov,EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstAddrIndexReg,
3349-
dstAddrIndexScale, dstOffset);
3350-
}
3387+
3388+
emitStore(INS_mov, regSize, srcIntReg);
33513389
}
33523390
#endif
33533391
}

‎src/coreclr/jit/lsraxarch.cpp‎

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,9 +1430,30 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
14301430
{
14311431
case GenTreeBlk::BlkOpKindUnroll:
14321432
{
1433-
constbool canUse16BytesSimdMov =
1434-
!blkNode->IsOnHeapAndContainsReferences() && compiler->IsBaselineSimdIsaSupported();
1435-
constbool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES);
1433+
bool willUseSimdMov = compiler->IsBaselineSimdIsaSupported() && (size >= XMM_REGSIZE_BYTES);
1434+
if (willUseSimdMov && blkNode->IsOnHeapAndContainsReferences())
1435+
{
1436+
ClassLayout* layout = blkNode->GetLayout();
1437+
1438+
unsigned xmmCandidates =0;
1439+
unsigned continuousNonGc =0;
1440+
for (unsigned slot =0; slot < layout->GetSlotCount(); slot++)
1441+
{
1442+
if (layout->IsGCPtr(slot))
1443+
{
1444+
xmmCandidates += ((continuousNonGc * TARGET_POINTER_SIZE) / XMM_REGSIZE_BYTES);
1445+
continuousNonGc =0;
1446+
}
1447+
else
1448+
{
1449+
continuousNonGc++;
1450+
}
1451+
}
1452+
xmmCandidates += ((continuousNonGc * TARGET_POINTER_SIZE) / XMM_REGSIZE_BYTES);
1453+
1454+
// Just one XMM candidate is not profitable
1455+
willUseSimdMov = xmmCandidates >1;
1456+
}
14361457

14371458
if (willUseSimdMov)
14381459
{

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp