Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3a1570f

Browse files
authored
Use AVX512 to zero locals (#91166)
1 parentdc82287 commit3a1570f

File tree

1 file changed

+17
-2
lines changed

1 file changed

+17
-2
lines changed

‎src/coreclr/jit/codegenxarch.cpp‎

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10894,9 +10894,12 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
1089410894
assert((blkSize + alignmentHiBlkSize) == (untrLclHi - untrLclLo));
1089510895
#endif// !defined(TARGET_AMD64)
1089610896

10897+
constint maxSimdSize = (int)compiler->roundDownSIMDSize(blkSize);
10898+
assert((maxSimdSize >= XMM_REGSIZE_BYTES) && (maxSimdSize <= ZMM_REGSIZE_BYTES));
10899+
1089710900
// The loop is unrolled 3 times so we do not move to the loop block until it
1089810901
// will loop at least once so the threshold is 6.
10899-
if (blkSize < (6 *XMM_REGSIZE_BYTES))
10902+
if (blkSize < (6 *maxSimdSize))
1090010903
{
1090110904
// Generate the following code:
1090210905
//
@@ -10905,10 +10908,22 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
1090510908
// ...
1090610909
// movups xmmword ptr [ebp/esp-OFFS], xmm4
1090710910
// mov qword ptr [ebp/esp-OFFS], rax
10908-
10911+
//
10912+
// NOTE: it implicitly zeroes YMM4 and ZMM4 as well.
1090910913
emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, zeroSIMDReg, zeroSIMDReg, zeroSIMDReg);
1091010914

1091110915
int i =0;
10916+
if (maxSimdSize > XMM_REGSIZE_BYTES)
10917+
{
10918+
for (; i <= blkSize - maxSimdSize; i += maxSimdSize)
10919+
{
10920+
// We previously aligned data to 16 bytes which might not be aligned to maxSimdSize
10921+
emit->emitIns_AR_R(simdUnalignedMovIns(),EA_ATTR(maxSimdSize), zeroSIMDReg, frameReg,
10922+
alignedLclLo + i);
10923+
}
10924+
// Remainder will be handled by the xmm loop below
10925+
}
10926+
1091210927
for (; i < blkSize; i += XMM_REGSIZE_BYTES)
1091310928
{
1091410929
emit->emitIns_AR_R(simdMov,EA_ATTR(XMM_REGSIZE_BYTES), zeroSIMDReg, frameReg, alignedLclLo + i);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp